class ZK: client = None def __init__(self, zk_host): self.client = KazooClient(zk_host) self.client.start() def __del__(self): self.client.stop() def get_node(self, path): if not self.client.exists(path): return None node = ZKNode(path, self) return node def create_node(self, path): self.client.ensure_path(path) return self.get_node(path) def get_transaction(self): return self.client.transaction() def get_lock(self, path, id=None): return self.client.Lock(path + "/lock", id) def has_lock(self, path): lock_path = path + "/lock" if not self.client.exists(lock_path): return False if len(self.client.get_children(lock_path)) > 0: return True else: return False
class Callback: def __init__(self, zk_hosts, zk_root): self.zk = KazooClient(zk_hosts) self.root = zk_root self.event = threading.Event() self.tasks = {} def get_task(self, task_id): node = '/{}/tasks/{}'.format(self.root, task_id) data, _ = self.zk.get(node) task = json.loads(data.decode()) targets = {} for target in self.zk.get_children('{}/targets'): path = '{}/targets/{}'.format(node, target) status, _ = self.zk.get(path) targets[target] = status.decode() task['targets'] = targets return task def delete(self, task_id): callback_node = '/{}/callback/{}'.format(self.root, task_id) task_node = '/{}/tasks/{}'.format(self.root, task_id) tx = self.zk.transaction() tx.delete(callback_node) tx.delete(task_node) tx.commit() def run(self, task_id): task = self.get_task(task_id) try: requests.post(task['callback'], json=task) self.delete(task_id) except Exception as e: logging.error(e) def watch_tasks(self, tasks): for task_id in set(tasks).difference(self.tasks): self.run(task_id) self.tasks = tasks return not self.event.is_set() def watch(self): ChildrenWatch(self.zk, '/{}/callback'.format(self.root), self.watch_tasks) def compensate(self): while not self.event.is_set(): for task in self.zk.get_children('/{}/callback'.format(self.root)): self.run(task) self.event.wait(10) def start(self): self.zk.start() self.watch() self.compensate() def shutdown(self): self.event.set() self.zk.close()
def main(): zk = KazooClient(hosts='127.0.0.1:2181') zk.start() @zk.add_listener def my_listener(state): if state == KazooState.LOST: print("LOST") elif state == KazooState.SUSPENDED: print("SUSPENDED") else: print("Connected") # Creating Nodes # Ensure a path, create if necessary zk.ensure_path("/my/favorite") # Create a node with data zk.create("/my/favorite/node", b"") zk.create("/my/favorite/node/a", b"A") # Reading Data # Determine if a node exists if zk.exists("/my/favorite"): print("/my/favorite is existed") @zk.ChildrenWatch("/my/favorite/node") def watch_children(children): print("Children are now: %s" % children) # Above function called immediately, and from then on @zk.DataWatch("/my/favorite/node") def watch_node(data, stat): print("Version: %s, data: %s" % (stat.version, data.decode("utf-8"))) # Print the version of a node and its data data, stat = zk.get("/my/favorite/node") print("Version: %s, data: %s" % (stat.version, data.decode("utf-8"))) # List the children children = zk.get_children("/my/favorite/node") print("There are %s children with names %s" % (len(children), children)) # Updating Data zk.set("/my/favorite", b"some data") # Deleting Nodes zk.delete("/my/favorite/node/a") # Transactions transaction = zk.transaction() transaction.check('/my/favorite/node', version=-1) transaction.create('/my/favorite/node/b', b"B") results = transaction.commit() print("Transaction results is %s" % results) zk.delete("/my/favorite/node/b") zk.delete("/my", recursive=True) time.sleep(2) zk.stop()
class ZookeeperClient: def __init__(self, configuration): self.host = configuration['host'] self.port = configuration['port'] self.client = KazooClient(hosts='{}:{}'.format(self.host, self.port)) self.client.start() def create_node(self, path, data): self.client.create(path, data) def get_node(self, path): data, stat = self.client.get(path) return data def get_children(self, path): return self.client.get_children(path) def get_children_count(self, path): try: return len(self.get_children(path)) except NoNodeError: return -1 @contextmanager def transaction(self): transaction = self.client.transaction() try: yield transaction finally: results = transaction.commit() if "failure" in results: logging.error("Zookeeper transaction failed!") def delete_node(self, path): self.client.delete(path) def safe_delete_node(self, path): try: self.client.delete(path) return True except BadVersionError as e: logging.warn("Bad Version Error") return False except NoNodeError as e: logging.warn("No Node Error") return False def close(self): self.client.stop()
def main(): zk = KazooClient(hosts='10.0.0.130:2182') zk.add_listener(my_listener) zk.start() zk.ensure_path("/my/favorite") zk.create("/my/favorite/node", b"") zk.create("/my/favorite/node/a", b"A") if zk.exists("/my/favorite"): print("/my/favorite is existed") @zk.ChildrenWatch("/my/favorite/node") def watch_children(children): print("Children are now: %s" % children) @zk.DataWatch("/my/favorite/node") def watch_node(data, stat): print("Version: %s, data: %s" % (stat.version, data.decode("utf-8"))) data, stat = zk.get("/my/favorite/node") print("Version: %s, data: %s" % (stat.version, data.decode("utf-8"))) # List the children children = zk.get_children("/my/favorite/node") print("There are %s children with names %s" % (len(children), children)) # Updating Data zk.set("/my/favorite", b"some data") # Deleting Nodes zk.delete("/my/favorite/node/a") # Transactions transaction = zk.transaction() transaction.check('/my/favorite/node', version=-1) transaction.create('/my/favorite/node/b', b"B") results = transaction.commit() print("Transaction results is %s" % results) zk.delete("/my/favorite/node/b") zk.delete("/my", recursive=True) time.sleep(2) zk.stop()
def run(self): zk_path_topic_tmpl = '/consumers/my_consumer/offsets/' zk_path_partition_tmpl = zk_path_topic_tmpl + '{topic}/{partition}' zk_conn = KazooClient(self.zk_connect_str, timeout=10) zk_conn.start() for topic in self.topics: for partition in self.partitions: node_path = zk_path_partition_tmpl.format(topic=topic, partition=partition) node = zk_conn.exists(node_path) if not node: zk_conn.ensure_path(node_path) zk_conn.set(node_path, b"0") consumer = KafkaConsumer( bootstrap_servers=[self.kafka_connect_str], group_id="my_consumer", auto_offset_reset='earliest', enable_auto_commit=False, ) consumer.subscribe(self.topics) iteration = 0 while not self._shutdown_event.is_set(): response = consumer.poll(timeout_ms=500, max_records=10) zk_trans = zk_conn.transaction() for tp, records in iteritems(response): topic = tp.topic partition = tp.partition offset = None for record in records: if offset is None or record.offset > offset: offset = record.offset if offset: zk_trans.set_data( os.path.join(zk_path_topic_tmpl.format(topic), str(partition)), binary_type(offset)) zk_trans.commit() iteration += 1 zk_conn.stop()
def run(self): zk_path_topic_tmpl = '/consumers/my_consumer/offsets/' zk_path_partition_tmpl = zk_path_topic_tmpl + '{topic}/{partition}' zk_conn = KazooClient(zk_instance['zk_connect_str'], timeout=10) zk_conn.start() for topic in TOPICS: for partition in PARTITIONS: node_path = zk_path_partition_tmpl.format(topic=topic, partition=partition) node = zk_conn.exists(node_path) if not node: zk_conn.ensure_path(node_path) zk_conn.set(node_path, str(0)) consumer = KafkaConsumer( bootstrap_servers=zk_instance['kafka_connect_str'], group_id="my_consumer", auto_offset_reset='earliest', enable_auto_commit=False) consumer.subscribe(TOPICS) while not SHUTDOWN.is_set(): response = consumer.poll(timeout_ms=500, max_records=10) zk_trans = zk_conn.transaction() for tp, records in response.iteritems(): topic = tp.topic partition = tp.partition offset = None for record in records: if offset is None or record.offset > offset: offset = record.offset if offset: zk_trans.set_data( os.path.join(zk_path_topic_tmpl.format(topic), str(partition)), str(offset)) zk_trans.commit() zk_conn.stop()
class ZookClient(object): def __init__(self, api_client=None): logging.basicConfig() # Create a client and start it self.zk = KazooClient() self.zk.start() def create_accounts_path(self, name, **kwargs): path = "/dso/" + name self.zk.ensure_path(path) self.zk.set(path, b"id: 7b4235ca-00fb-4dca-ad3e-8b6e3662631a\ngroupname: hr\ndescription: 人力资源") def create_accountinfo_path(self, account_id, **kwargs): self.zk.ensure_path("/app/someservice") def create_path(self, path, **kwargs): self.zk.ensure_path(path) def get_data(self, path): return self.zk.get(path) def test_tran(self): self.zk.delete("/app22") self.zk.create("/app22", b"" + '{"12": "12"}') tran = self.zk.transaction() tran.delete("/app22") tran.create("/app22", b"" + '{"22": "22"}') tran.commit() print "commit" def stop(self): # In the end, stop it self.zk.stop()
class ZookClient(object): def __init__(self, api_client=None): logging.basicConfig() # Create a client and start it self.zk = KazooClient() self.zk.start() def create_accounts_path(self, name, **kwargs): path = "/dso/" + name self.zk.ensure_path(path) self.zk.set( path, b"id: 7b4235ca-00fb-4dca-ad3e-8b6e3662631a\ngroupname: hr\ndescription: 人力资源" ) def create_accountinfo_path(self, account_id, **kwargs): self.zk.ensure_path("/app/someservice") def create_path(self, path, **kwargs): self.zk.ensure_path(path) def get_data(self, path): return self.zk.get(path) def test_tran(self): self.zk.delete("/app22") self.zk.create("/app22", b"" + '{"12": "12"}') tran = self.zk.transaction() tran.delete("/app22") tran.create("/app22", b"" + '{"22": "22"}') tran.commit() print "commit" def stop(self): # In the end, stop it self.zk.stop()
''' znodes = async_obj.get() try: children = async_obj.get() # すべての子ノードの名称を出力 print('#####[print child znodes]#####') for child in children: print(child) except (ConnectionLossException, NoAuthException): print("ERROR!!!") sys.exit(1) if __name__ == '__main__': # トランザクションの開始 tx = zk.transaction() ## 基本的な使い方を確認 # パスの生成 zk.ensure_path(root) # znodeが未作成であれば作成 znode = root + '/sample_znode' if zk.exists(znode) is None: zk.create(znode, b'sample_data') print_status(znode) # データの更新 zk.set(znode, b'updated_data') print_status(znode) # 子ノードの追加 znode2 = root + '/sample_znode2' if zk.exists(znode2) is None: zk.create(znode2, b'sample_data2')
class ZkSyncManager(object): RETRIES = 2 LOCK_TIMEOUT = 3 def __init__(self, host='127.0.0.1:2181', lock_path_prefix='/mastermind/locks/'): self.client = KazooClient(host, timeout=3) logger.info('Connecting to zookeeper host {0}, ' 'lock_path_prefix: {1}'.format(host, lock_path_prefix)) try: self.client.start() except Exception as e: logger.error(e) raise self._retry = KazooRetry(max_tries=self.RETRIES) self.lock_path_prefix = lock_path_prefix @contextmanager def lock(self, lockid, blocking=True, timeout=LOCK_TIMEOUT): # with self.__locks_lock: lock = Lock(self.client, self.lock_path_prefix + lockid) try: acquired = lock.acquire(blocking=blocking, timeout=timeout) logger.debug('Lock {0} acquired: {1}'.format(lockid, acquired)) if not acquired: raise LockFailedError(lock_id=lockid) yield except LockTimeout: logger.info('Failed to acquire lock {0} due to timeout ' '({1} seconds)'.format(lockid, timeout)) raise LockFailedError(lock_id=lockid) except LockFailedError: raise except Exception as e: logger.error('Failed to acquire lock {0}: {1}\n{2}'.format( lockid, e, traceback.format_exc())) raise finally: lock.release() def persistent_locks_acquire(self, locks, data=''): try: retry = self._retry.copy() result = retry(self._inner_persistent_locks_acquire, locks=locks, data=data) except RetryFailedError: raise LockError except KazooException as e: logger.error('Failed to fetch persistent locks {0}: {1}\n{2}'.format( locks, e, traceback.format_exc())) raise LockError return result def _inner_persistent_locks_acquire(self, locks, data): ensured_paths = set() tr = self.client.transaction() for lockid in locks: path = self.lock_path_prefix + lockid parts = path.rsplit('/', 1) if len(parts) == 2 and parts[0] not in ensured_paths: self.client.ensure_path(parts[0]) ensured_paths.add(parts[0]) tr.create(path, data) failed = False failed_locks = [] result = tr.commit() for i, res in enumerate(result): if isinstance(res, ZookeeperError): failed = True if isinstance(res, NodeExistsError): failed_locks.append(locks[i]) if failed_locks: holders = [] for f in failed_locks: # TODO: fetch all holders with 1 transaction request holders.append((f, self.client.get(self.lock_path_prefix + f))) foreign_holders = [(l, h) for l, h in holders if h[0] != data] failed_lock, holder_resp = foreign_holders and foreign_holders[0] or holders[0] holder = holder_resp[0] holders_ids = list(set([h[0] for _, h in holders])) logger.warn('Persistent lock {0} is already set by {1}'.format(failed_lock, holder)) raise LockAlreadyAcquiredError( 'Lock for {0} is already acquired by job {1}'.format(failed_lock, holder), lock_id=failed_lock, holder_id=holder, holders_ids=holders_ids) elif failed: logger.error('Failed to set persistent locks {0}, result: {1}'.format( locks, result)) raise LockError return True def get_children_locks(self, lock_prefix): try: retry = self._retry.copy() result = retry(self.__inner_get_children_locks, lock_prefix) except RetryFailedError: raise LockError return result def __inner_get_children_locks(self, lock_prefix): full_path = self.lock_path_prefix + lock_prefix self.client.ensure_path(os.path.normpath(full_path)) result = self.client.get_children(full_path) return ['{0}{1}'.format(lock_prefix, lock) for lock in result] def persistent_locks_release(self, locks, check=''): try: retry = self._retry.copy() result = retry(self.__inner_persistent_locks_release, locks=locks, check=check) except RetryFailedError: raise LockError except KazooException as e: logger.error('Failed to remove persistent locks {0}: {1}\n{2}'.format( locks, e, traceback.format_exc())) raise LockError return result def __inner_persistent_locks_release(self, locks, check): for lockid in locks: try: if check: data = self.client.get(self.lock_path_prefix + lockid) if data[0] != check: logger.error('Lock {0} has inconsistent data: {1}, ' 'expected {2}'.format(lockid, data[0], check)) raise InconsistentLockError(lock_id=lockid, holder_id=data[0]) self.client.delete(self.lock_path_prefix + lockid) except NoNodeError: logger.warn('Persistent lock {0} is already removed'.format(lockid)) pass return True
if state == KazooState.LOST: # Register somewhere that the session was lost print 'The session is lost: %s' % str(state) elif state == KazooState.SUSPENDED: # Handle being disconnected from Zookeeper print 'The session is suspended: %s' % str(state) else: # Handle being connected/reconnected to Zookeeper print 'The session is reconnected: %s' % str(state) zk = KazooClient(hosts='127.0.0.1:2181') zk.start() zk.add_listener(my_listener) # Ensure a path, create if necessary zk.ensure_path("/dragonflow/table1") # Determine if a node exists if zk.exists("/dragonflow/table1/key1"): # Do transaction transaction = zk.transaction() transaction.create('/dragonflow/table1/key9', b"value9") transaction.set_data('/dragonflow/table1/key1', b"value8") results = transaction.commit() print(results) result = zk.get('/dragonflow/table1/key8') print result[0]
class ZookeeperClusterManager(ClusterManager): """ A cluster manager that manages one cluster's state and configurations with a Zookeeper ensemble via kazoo. Below is the structure of the znodes: /needlestack /<CLUSTER_NAME_1> /live_nodes /<HOSTPORT_1> /<HOSTPORT_2> /<HOSTPORT_3> /<HOSTPORT_4> ... /collections /<COLLECTION_NAME_1> /shards /<SHARD_NAME_1> /replicas /<HOSTPORT_2> /<HOSTPORT_4> /<SHARD_NAME_2> /replicas /<HOSTPORT_1> /<HOSTPORT_3> /<COLLECTION_NAME_2> ... """ cluster_name: str hostport: str zk: KazooClient cache: TreeCache def __init__(self, cluster_name: str, hostport: str, hosts: List[str], zookeeper_root: str): self.cluster_name = cluster_name self.hostport = hostport self.zookeeper_root = zookeeper_root self.zk = KazooClient(hosts=hosts) self.zk.add_listener(self.zk_listener) self.cache = TreeCache(self.zk, self.base_znode) @property def base_znode(self): return f"{self.zookeeper_root}/{self.cluster_name}" @property def live_nodes_znode(self): return f"{self.base_znode}/live_nodes" @property def this_node_znode(self): return f"{self.base_znode}/live_nodes/{self.hostport}" @property def collections_znode(self): return f"{self.base_znode}/collections" def collection_znode(self, collection_name: str) -> str: return f"{self.collections_znode}/{collection_name}" def shard_znode(self, collection_name: str, shard_name: str = None) -> str: znode = f"{self.collections_znode}/{collection_name}/shards" if shard_name: znode += "/" + shard_name return znode def replica_znode(self, collection_name: str, shard_name: str, hostport: str = None) -> str: shard_znode = self.shard_znode(collection_name, shard_name) znode = f"{shard_znode}/replicas" if hostport: znode += "/" + hostport return znode def startup(self): self.zk.start() self.cache.start() signal.signal(signal.SIGINT, self.signal_listener) signal.signal(signal.SIGTERM, self.signal_listener) self.zk.ensure_path(self.live_nodes_znode) self.zk.ensure_path(self.collections_znode) def shutdown(self): self.cache.close() self.zk.stop() self.zk.close() def cleanup(self): logger.info(f"Removing ZNodes via cleanup") transaction = self.zk.transaction() for collection in self.list_local_collections(): for shard in collection.shards: for replica in shard.replicas: znode = self.replica_znode(collection.name, shard.name, replica.hostport) transaction.delete(znode) self.commit_transaction(transaction) def register_merger(self): pass def register_searcher(self): try: retrier = KazooRetry(max_tries=5, delay=1, backoff=2, max_delay=20) retrier(self.zk.create, self.this_node_znode, ephemeral=True, makepath=True) logger.info(f"Created ephemeral ZNode {self.this_node_znode}") except kazoo.retry.RetryFailedError: logger.error( f"Max retries reached for creating ephemeral ZNode {self.this_node_znode}" ) except kazoo.retry.InterruptedError: logger.error( f"Retries interrupted for creating ephemeral ZNode {self.this_node_znode}" ) def set_state(self, state, collection_name=None, shard_name=None, hostport=None): transaction = self.zk.transaction() collections = [collection_name] if collection_name else None for collection in self._list_collections(collections, hostport=hostport, load_replica=True): logger.info( f"Set {collection.name}/shards ZNodes to {collections_pb2.Replica.State.Name(state)}" ) for shard in collection.shards: for replica in shard.replicas: znode = self.replica_znode(collection.name, shard.name, replica.node.hostport) replica.state = state transaction.set_data(znode, replica.SerializeToString()) return self.commit_transaction(transaction) def set_local_state(self, state, collection_name=None, shard_name=None): return self.set_state(state, collection_name, shard_name, self.hostport) def signal_listener(self, signum, frame): self.shutdown() def zk_listener(self, state): if state == KazooState.LOST: logger.warn("Connection to Zookeeper lost") elif state == KazooState.SUSPENDED: logger.warn("Connection to Zookeeper disconnected") else: logger.info("Connection to Zookeeper established") def add_collections(self, collections): """Configure a list of collections into Zookeeper """ transaction = self.zk.transaction() for collection in collections: collection_copy = deepcopy(collection) collection_copy.ClearField("shards") collection_znode = self.collection_znode(collection.name) transaction.create(collection_znode, collection_copy.SerializeToString()) transaction.create(self.shard_znode(collection.name)) for shard in collection.shards: shard_copy = deepcopy(shard) shard_copy.ClearField("replicas") shard_znode = self.shard_znode(collection.name, shard.name) transaction.create(shard_znode, shard_copy.SerializeToString()) transaction.create( self.replica_znode(collection.name, shard.name)) for replica in shard.replicas: replica_copy = deepcopy(replica) replica_copy.state = collections_pb2.Replica.BOOTING replica_znode = self.replica_znode(collection.name, shard.name, replica.node.hostport) transaction.create(replica_znode, replica_copy.SerializeToString()) if self.commit_transaction(transaction): return collections else: return [] def delete_collections(self, collection_names): transaction = self.zk.transaction() for collection_name in collection_names: shards_znode = self.shard_znode(collection_name) for shard_name in self.zk.get_children(shards_znode): replicas_znode = self.replica_znode(collection_name, shard_name) for replica_name in self.zk.get_children(replicas_znode): replica_znode = self.replica_znode(collection_name, shard_name, replica_name) transaction.delete(replica_znode) transaction.delete(replicas_znode) transaction.delete( self.shard_znode(collection_name, shard_name)) transaction.delete(shards_znode) transaction.delete(self.collection_znode(collection_name)) if self.commit_transaction(transaction): return collection_names else: return [] def list_nodes(self): live_nodes = self.zk.get_children(self.live_nodes_znode) nodes = [collections_pb2.Node(hostport=node) for node in live_nodes] return nodes def list_collections(self, collection_names=None, include_state=True): return self._list_collections(collection_names, load_replica=include_state) def list_local_collections(self, include_state=True): return self._list_collections(hostport=self.hostport, load_replica=include_state) def _list_collections( self, collection_names: Optional[List[str]] = None, hostport: Optional[str] = None, load_replica: Optional[bool] = True, ) -> List[collections_pb2.Collection]: collections = [] collection_names = collection_names or self.zk.get_children( self.collections_znode) for collection_name in collection_names: shards = [] shards_znode = self.shard_znode(collection_name) for shard_name in self.zk.get_children(shards_znode): replicas = [] replicas_znode = self.replica_znode(collection_name, shard_name) for replica_hostport in self.zk.get_children(replicas_znode): if hostport == replica_hostport or hostport is None: replica_znode = self.replica_znode( collection_name, shard_name, replica_hostport) if load_replica: replica_data, _ = self.zk.get(replica_znode) replica_proto = collections_pb2.Replica.FromString( replica_data) else: replica_proto = collections_pb2.Replica() replicas.append(replica_proto) if replicas: shard_znode = self.shard_znode(collection_name, shard_name) shard_data, _ = self.zk.get(shard_znode) shard_proto = collections_pb2.Shard.FromString(shard_data) shard_proto.replicas.extend(replicas) shards.append(shard_proto) if shards: collection_znode = self.collection_znode(collection_name) collection_data, _ = self.zk.get(collection_znode) collection_proto = collections_pb2.Collection.FromString( collection_data) collection_proto.shards.extend(shards) collections.append(collection_proto) return collections def get_searchers(self, collection_name, shard_names=None): if not shard_names: shards_znode = self.shard_znode(collection_name) shard_names = self.cache.get_children(shards_znode, []) shard_hostports = [] for shard_name in shard_names: hostports = self._get_searchers_for_shard(collection_name, shard_name, active=True) if hostports: shard_hostports.append((shard_name, hostports)) else: logger.error( f"No active Searcher node for {collection_name}/{shard_name}." ) return shard_hostports def _get_searchers_for_shard(self, collection_name: str, shard_name: str, active: bool = True) -> List[str]: replicas_znode = self.replica_znode(collection_name, shard_name) hostports = self.cache.get_children(replicas_znode, []) if active: active_hostports = [] for hostport in hostports: replica_znode = self.replica_znode(collection_name, shard_name, hostport) node = self.cache.get_data(replica_znode) if node: replica = collections_pb2.Replica.FromString(node.data) if replica.state == collections_pb2.Replica.ACTIVE: active_hostports.append(hostport) hostports = active_hostports return hostports def commit_transaction( self, transaction: kazoo.client.TransactionRequest) -> bool: """Commit a transaction and log the first exception after rollbacks""" for result, operation in zip(transaction.commit(), transaction.operations): if isinstance(result, kazoo.exceptions.RolledBackError): continue elif isinstance(result, Exception): logger.error( f"{result.__class__.__name__} in Kazoo transaction: {operation}" ) return False return True
class ZookClient(object): def __init__(self, zookeeper_host=None): logging.basicConfig() # Create a client and start it if zookeeper_host is None: zookeeper_host = "localhost:2181" self.zk = KazooClient(zookeeper_host) self.zk.start() self.tran = self.zk.transaction() self.CONST_BASE_PATH = "/dso/" self.CONST_ACCOUNTS_PATH = "accounts/" self.CONST_GROUPS_PATH = "groups/" self.CONST_USERS_PATH = "users/" self.CONST_HOSTS_PATH = "hosts/" self.CONST_SERVERS_PATH = "servers/" self.CONST_SERVICES_PATH = "services/" self.CONST_INSTANCES_PATH = "instances/" self.CONST_MAPPING_PATH = "Mapping/" self.CONST_IP2USER_PATH = "Ip2User/" self.CONST_MAC2USER_PATH = "Mac2User/" self.CONST_IP2VMPATH = "Ip2Vm/" self.CONST_USER2ACCOUNT_PATH = "User2Account/" self.CONST_AID2ANAME_PATH = "Aid2Aname/" self.CONST_VM_INFO_PATH = "VmInfo/" self.CONST_DSO_PATH = "dso/" self.CONST_HOST_MAPPING = dict(vrouter='fedora', dns='ubuntu', firewall='ubuntu', ipsecvpn='centos', vpc='ubuntu') # create constant base path self.zk.ensure_path(self.CONST_BASE_PATH) # create accounts path self.zk.ensure_path(self.CONST_BASE_PATH + self.CONST_ACCOUNTS_PATH) # create mapping path self.zk.ensure_path(self.CONST_BASE_PATH + self.CONST_MAPPING_PATH) self.zk.ensure_path(self.CONST_BASE_PATH + self.CONST_MAPPING_PATH + self.CONST_IP2USER_PATH) self.zk.ensure_path(self.CONST_BASE_PATH + self.CONST_MAPPING_PATH + self.CONST_IP2VMPATH) self.zk.ensure_path(self.CONST_BASE_PATH + self.CONST_MAPPING_PATH + self.CONST_MAC2USER_PATH) # create dso path in mapping ip2vm for jinzhou use dso_path = self.CONST_BASE_PATH + self.CONST_MAPPING_PATH +self.CONST_IP2VMPATH + self.CONST_DSO_PATH self.zk.ensure_path(dso_path) self.zk.set(dso_path, b"" + '{"accountname": "dso"}') dso_ip_path = 'dso_path + "172.19.0.101"' self.zk.ensure_path(dso_ip_path) self.zk.set(dso_ip_path, b"" + '{"hostname": "dso-server", "manageip": "172.19.0.101", "servicename": "dso"}') def create_account_path(self, account_info): """ the path to a account :param account_info: """ # create accounts path accounts_path = self.CONST_BASE_PATH + self.CONST_ACCOUNTS_PATH if not self.zk.exists(accounts_path): self.zk.ensure_path(accounts_path) path = accounts_path + account_info.id account_data = dict(id=account_info.id, name=account_info.account_name) self.zk.ensure_path(path) self.zk.set(path, b"" + json.dumps(account_data).encode('utf8')) def create_accountinfo_path(self, account_info): account_path = self.CONST_BASE_PATH + self.CONST_ACCOUNTS_PATH + account_info.id + "/" if not self.zk.exists(account_path): print "Path" + account_path + " does not exist." # create groups path groups_path = account_path + self.CONST_GROUPS_PATH self.zk.ensure_path(groups_path) # set each group for group_item in account_info.groups: self.create_usergroup_path(groups_path, group_item) # create service path services_path = account_path + self.CONST_SERVICES_PATH self.zk.ensure_path(services_path) # set services summary self.gen_services_summay(services_path, account_info.services) # set each service for service_item in account_info.services: self.create_service_path(services_path, service_item) def create_service_path(self, parent_path, service): if service is None or service.servicename is None: return service_path = parent_path + service.servicename + "/" # service_data = "id: " + service.id + "\nservicename: " + service.servicename service_data = dict(id=service.id, servicename=service.servicename) self.zk.ensure_path(service_path) self.zk.set(service_path, b"" + json.dumps(service_data).encode('utf8')) # create instances path instances_path = service_path + self.CONST_INSTANCES_PATH self.zk.ensure_path(instances_path) # set each instance for instance_item in service.instances: self.create_instance_path(instances_path, instance_item, service.servicename) def create_instance_path(self, parent_path, instance, service_name): if instance.id is None: print "instance.id is None, maybe is vpc, and vpc is fail" return instance_path = parent_path + instance.id # instance_data = "id: " + instance.id + \ # "\nmac: " + self.check_none(instance.mac) + \ # "\nmanageip: " + self.check_none(instance.manageip) + \ # "\npublicip: " + self.check_none(instance.publicip) + \ # "\npublicgateway: " + self.check_none(instance.publicgateway) + \ # "\npublicnetmask: " + self.check_none(instance.publicnetmask) + \ # "\nserviceip: " + self.check_none(instance.serviceip) + \ # "\nstatus: " + self.check_none(instance.status) instance_data = dict(id=instance.id, mac=instance.mac, manageip=instance.manageip, publicip=instance.publicip, publicgateway=instance.publicgateway, publicnetmask=instance.publicnetmask, serviceip=instance.serviceip, status=instance.status, user=self.CONST_HOST_MAPPING[service_name] ) self.zk.ensure_path(instance_path) # self.zk.set(instance_path, b"" + instance_data.encode('utf8')) self.zk.set(instance_path, b"" + json.dumps(instance_data).encode('utf8')) return instance_path def create_usergroup_path(self, parent_path, group): if group is None or group.groupname is None: return group_path = parent_path + group.groupname + "/" # group_data = "id: " + group.id + "\ngroupname: " + \ # group.groupname + "\ndescription: " + self.check_none(group.description) group_data = dict( id=group.id, groupname=group.groupname, description=group.description ) self.zk.ensure_path(group_path) self.zk.set(group_path, b"" + json.dumps(group_data).encode('utf8')) # create users path users_path = group_path + self.CONST_USERS_PATH self.zk.ensure_path(users_path) # set each user for user_item in group.users: self.create_user_path(users_path, user_item) # create servers path servers_path = group_path + self.CONST_SERVERS_PATH self.zk.ensure_path(servers_path) # set each server/host for server in group.servers: self.create_host_path(servers_path, server) def create_user_path(self, parent_path, user): if user is None or user.name is None: print "user is None" return user_path = parent_path + user.name + "/" # user_data = "id: " + user.id + "\nname: " + \ # user.name + "\nemail: " + self.check_none(user.email) user_data = dict( id=user.id, name=user.name, email=user.email) self.zk.ensure_path(user_path) self.zk.set(user_path, b"" + json.dumps(user_data).encode('utf8')) # create hosts path hosts_path = user_path + self.CONST_HOSTS_PATH self.zk.ensure_path(hosts_path) # set each hosts for host_item in user.hosts: self.create_host_path(hosts_path, host_item) def create_host_path(self, parent_path, host): if host is None or host.mac is None: return host_path = parent_path + host.mac # host_data = "ip: " + self.check_none(host.ip) + "\nmac: " + \ # host.mac + "\nhost_name: " + \ # self.check_none(host.mac) + \ # "\ndomain: " + self.check_none(host.domain) host_data = dict( ip=host.ip, mac=host.mac, host_name=host.host_name, domain=host.domain) self.zk.ensure_path(host_path) self.zk.set(host_path, b"" + json.dumps(host_data).encode('utf8')) return host_path def delete_dso_path(self): self.zk.delete("/dso", recursive=True) self.zk.ensure_path("/dso") def delete_account_path(self, account_id): account_path = self.CONST_BASE_PATH + self.CONST_ACCOUNTS_PATH + account_id self.zk.delete(account_path, recursive=True) def delete_account_mapping_path(self, account_id): ip2vm_path = self.CONST_BASE_PATH + self.CONST_MAPPING_PATH + self.CONST_IP2VMPATH + account_id ip2user_path = self.CONST_BASE_PATH + self.CONST_MAPPING_PATH + self.CONST_IP2USER_PATH + account_id mac2user_path = self.CONST_BASE_PATH + self.CONST_MAPPING_PATH + self.CONST_MAC2USER_PATH + account_id self.zk.delete(ip2vm_path, recursive=True) self.zk.delete(ip2user_path, recursive=True) self.zk.delete(mac2user_path, recursive=True) @staticmethod def check_none(data): return "None" if data is None else data def create_user2account_path(self, user2account): path = self.CONST_BASE_PATH + self.CONST_MAPPING_PATH + self.CONST_USER2ACCOUNT_PATH self.zk.ensure_path(path) self.zk.set(path, b"" + json.dumps(user2account).encode('utf8')) def create_aid2aname_path(self, aid2aname): if not self.zk.exists(self.CONST_BASE_PATH + self.CONST_MAPPING_PATH): self.zk.ensure_path(self.CONST_BASE_PATH + self.CONST_MAPPING_PATH) path = self.CONST_BASE_PATH + self.CONST_MAPPING_PATH + self.CONST_AID2ANAME_PATH self.zk.ensure_path(path) self.zk.set(path, b"" + json.dumps(aid2aname).encode('utf8')) def create_ip2user_path(self, account_info, ip2user): ip2user_path = self.CONST_BASE_PATH + self.CONST_MAPPING_PATH + self.CONST_IP2USER_PATH account_path = ip2user_path + account_info.id + "/" tran = self.zk.transaction() # delete childern if self.zk.exists(account_path): children = self.zk.get_children(account_path) for e in children: tran.delete(account_path + e) tran.delete(account_path) account_data = {"account_name": account_info.account_name} tran.create(account_path, b"" + json.dumps(account_data).encode('utf8')) for ip in ip2user.keys(): if ip is None: print "Ip is None" continue ip_path = account_path + ip tran.create(ip_path, b"" + json.dumps(ip2user[ip]).encode('utf8')) tran.commit() def create_mac2user_path(self, account_info, mac2user): mac2user_path = self.CONST_BASE_PATH + self.CONST_MAPPING_PATH + self.CONST_MAC2USER_PATH account_path = mac2user_path + account_info.id + "/" tran = self.zk.transaction() # delete childern if self.zk.exists(account_path): children = self.zk.get_children(account_path) for e in children: tran.delete(account_path + e) tran.delete(account_path) account_data = {"account_name": account_info.account_name} tran.create(account_path, b"" + json.dumps(account_data).encode('utf8')) for mac in mac2user.keys(): if mac is None: print "mac is None" continue ip_path = account_path + mac tran.create(ip_path, b"" + json.dumps(mac2user[mac]).encode('utf8')) tran.commit() def create_ip2vm_path(self, account_info): instance_dict = self.get_instances(account_info) zoo_instances = {} base_path = self.CONST_BASE_PATH + self.CONST_MAPPING_PATH + self.CONST_IP2VMPATH account_path = base_path + account_info.id + "/" account_data = dict(accountname=account_info.account_name) tran = self.zk.transaction() # delete not use node if self.zk.exists(account_path): # update account if self.zk.get(account_path)[0] != json.dumps(account_data): tran.set_data(account_path, b"" + json.dumps(account_data).encode('utf8')) children = self.zk.get_children(account_path) for e in children: if instance_dict.get(e, None) is None: tran.delete(account_path + e) else: zoo_vm = json.loads(self.zk.get(account_path + e)[0]) zoo_instances[e] = zoo_vm else: tran.create(account_path, b"" + json.dumps(account_data).encode('utf8')) # update vms for ip, vm in instance_dict.iteritems(): if zoo_instances.get(ip, None) is not None: if not self.same_instance(vm, zoo_instances.get(ip)): tran.set_data(account_path + ip, b"" + json.dumps(vm).encode('utf8')) else: tran.create(account_path + ip, b"" + json.dumps(vm).encode('utf8')) tran.commit() def sync_mapping_pre_account(self, account_info, vpn_clients): user2account = {} ip2user = {} mac2user = {} for group in account_info.groups: for user in group.users: user2account[user.id] = account_info.id for host in user.hosts: user_data = dict(userid=user.id, username=user.name, useremail=user.email, groupname=group.groupname) if host.ip is not None: ip2user[host.ip] = user_data mac2user[host.mac] = user_data # add vpn info for vpn in vpn_clients: vpn_user_data = dict(userid='vpn_user', username=vpn.user_name, useremail='', groupname=vpn.group) if vpn.ip is not None: vpn_ip = vpn.ip if vpn_ip.__contains__("/"): vpn_ip = vpn_ip.split("/")[0] ip2user[vpn_ip] = vpn_user_data # create node self.create_ip2user_path(account_info, ip2user) # create node mac2user self.create_mac2user_path(account_info, mac2user) # not used now # self.create_user2account_path(user2account) # create ip2vm self.create_ip2vm_path(account_info) def gen_vm_path(self, vm_info): if not self.zk.exists(self.CONST_BASE_PATH + self.CONST_MAPPING_PATH): self.zk.ensure_path(self.CONST_BASE_PATH + self.CONST_MAPPING_PATH) vms_path = self.CONST_BASE_PATH + self.CONST_MAPPING_PATH + self.CONST_VM_INFO_PATH self.zk.ensure_path(vms_path) for vm in vm_info: vm_node_path = vms_path + vm.manage_ip.replace('/', '-') vm_dict = dict(id=vm.id, type=vm.type, status=vm.status) self.zk.ensure_path(vm_node_path) self.zk.set(vm_node_path, b"" + json.dumps(vm_dict).encode('utf8')) def create_mapping_accounts(self, accounts): aid2aname = {} for account in accounts: aid2aname[account.id] = account.account_name self.create_aid2aname_path(aid2aname) def gen_services_summay(self, path, services): services_data = {} for service_item in services: instance_array = "" for instance in service_item.instances: instance_data = "<table class=\"table\">" \ "<tr>" \ "<td>manageip</td><td>" + self.check_none(instance.manageip) \ + "</td>" \ "</tr>" \ "<tr>" \ "<td>publicip</td><td>" + self.check_none( instance.publicip) + "</td>" \ "</tr>" \ "<tr>" \ "<td>publicgateway</td><td>" + self.check_none( instance.publicgateway) + "</td>" \ "</tr>" \ "<tr>" \ "<td>publicnetmask</td><td>" + self.check_none( instance.publicnetmask) + "</td>" \ "</tr>" \ "<tr>" \ "<td>serviceip</td><td>" + self.check_none(instance.serviceip) \ + "</td>" \ "</tr>" \ "<tr><td>status</td><td>" + self.check_none( instance.status) + "</td>" \ "</tr>" \ "<tr><td>user</td><td>" + \ self.check_none(self.CONST_HOST_MAPPING[service_item.servicename]) \ + "</td>" \ "</tr>" \ "</table>" instance_array += instance_data + "\n" services_data[service_item.servicename] = instance_array self.zk.set(path, b"" + json.dumps(services_data).encode('utf8')) def get_all_account(self): accounts_path = self.CONST_BASE_PATH + self.CONST_ACCOUNTS_PATH account_list = self.zk.get_children(accounts_path) return account_list def get_instances(self, account_info): instances_dict = {} for service_item in account_info.services: service_name = service_item.servicename for instance in service_item.instances: if instance.manageip is None: continue manageip = instance.manageip if manageip is None: continue if instance.manageip.__contains__('/'): manageip =manageip[: manageip.find('/')] host_name = account_info.id + '-' + service_name instance_data = dict(id=instance.id, mac=instance.mac, manageip=instance.manageip, publicip=instance.publicip, publicgateway=instance.publicgateway, publicnetmask=instance.publicnetmask, serviceip=instance.serviceip, status=instance.status, servicename=service_name, hostname=host_name ) instances_dict[manageip] = instance_data return instances_dict def commit(self): self.tran.commit() def stopZooK(self): # In the end, stop it self.zk.stop() def same_instance(self, vm0, vm1): return vm0.get('id') == vm1.get('id') and vm0.get('hostname') == vm1.get('hostname')
class ZkOperation(object): def __init__(self, zk_hosts, zk_root): self.zk = KazooClient(zk_hosts) self.root = zk_root self.tasks = set() self.event = threading.Event() def start(self): if self.zk.exists: self.zk.start() self.zk.add_auth('digest', 'publish:publish') if self.zk.connected: self.zk.ensure_path(self.root) def is_job_exist(self, job_id): if job_id == '': raise Exception('job_id is ""') node = self.root + '/jobs/' + job_id return self.zk.exists(node) def check_task_status(self, path): if path == '': raise Exception('path is ""') node = self.root + path data, _ = self.zk.get(node) return data['Status'] def _is_exist(self, node): if self.zk.connected and self.zk.exists(node): if self.zk.exists(node): return True else: return False else: logger.error('zk not connected or node is exists') return False def _create_node(self, node, value=None): if value is None: value = '' value = json.dumps(value) if self.zk.connected and not self.zk.exists(node): self.zk.create(node, makepath=True, value=value.encode()) return True else: logger.error('zk not connected or node is exists') return False def _update_node(self, node, value): if self.zk.connected and self.zk.exists(node): tx = self.zk.transaction() tx.set_data(node, json.dumps(value).encode()) tx.commit() return True else: logger.error( 'update node failed: zk not connected or node is not exists') return False def _get_node(self, node): if self.zk.connected and self.zk.exists(node): node_value, _ = self.zk.get(node) if node_value: return json.loads(node_value.decode()) else: return {} else: logger.error('zk not connected or node is exists') return False def _delete_node(self, node): if self.zk.connected: if not self.zk.exists(node): return True else: self.zk.delete(node, recursive=True) return True else: logger.error('zk not connected') return False # is exist def is_exist_signal(self, job_id): node = '/{}/signal/{}'.format(self.root, job_id) return self._is_exist(node) # CREATE def create_new_job(self, job_id, job_value=None): if job_value is None: job_value = '' if job_id != '': node = self.root + '/jobs/' + job_id ret = self._create_node(node, job_value) return ret else: logger.error('job_id is null') return False def create_new_target(self, job_id, target, target_value): node = '/{}/jobs/{}/targets/{}'.format(self.root, job_id, target) ret = self._create_node(node, target_value) return ret def create_new_task(self, job_id, target, task): node = '/{}/jobs/{}/targets/{}/tasks/{}'.format( self.root, job_id, target, task['task_id']) ret = self._create_node(node, task) return ret def create_job_signal(self, job_id): node = '/{}/signal/{}'.format(self.root, job_id) ret = self._create_node(node, uuid.uuid4().hex) return ret # GET def get_job_info(self, job_id): job_node = '{}/jobs/{}'.format(self.root, job_id) job_value, _ = self.zk.get(job_node) job_info = json.loads(job_value.decode()) return job_info def get_target_info(self, job_id, target): target_node = '{}/jobs/{}/targets/{}'.format(self.root, job_id, target) target_value, _ = self.zk.get(target_node) target_info = json.loads(target_value.decode()) return target_info def get_task_info(self, job_id, target, task_id): task_node = '{}/jobs/{}/targets/{}/tasks/{}'.format( self.root, job_id, target, task_id) task_value, _ = self.zk.get(task_node) task_info = json.loads(task_value.decode()) return task_info # UPDATE def update_job_status(self, job_id, task): if job_id != '' and task is not None: node = self.root + '/signal/' + job_id else: raise Exception('job_id is ""') if self.zk.connected and self.is_job_exist(job_id): tx = self.zk.transaction() tx.set_data(node, task.encode()) tx.commit() def handler_task(self, job_id, task_id, task_name, task_message, status): # 为不必传回target, 遍历任务节点 if not self.is_job_exist(job_id): logger.error("can not find this jobid: {}".format(job_id)) return False job_node = "{}/jobs/{}/targets".format(self.root, job_id) for target in self.zk.get_children(job_node): target_node = "{}/{}/tasks".format(job_node, target) for task in self.zk.get_children(target_node): if task == task_id: task_node = "{}/{}".format(target_node, task) task_value, _ = self.zk.get(task_node) new_task_value = json.loads(task_value.decode()) new_task_value['status'] = status tx = self.zk.transaction() tx.set_data(task_node, json.dumps(new_task_value).encode()) tx.commit() task_value, _ = self.zk.get(task_node) return True logger.error("can not find this taskid: {} in {}".format( task_id, job_id)) return False def get_target_by_taskid(self, job_id, task_id): if self.is_job_exist(job_id): node = "{}/jobs/{}/targets".format(self.root, job_id) for target in self.zk.get_children(node): path = '{}/{}/tasks'.format(node, target) for taskid in self.zk.get_children(path): if taskid == task_id: return target return False else: logger.error("job is not exist: job_id={}".format(job_id)) def send_signal(self, job_id): node = '{}/signal/{}'.format(self.root, job_id) logger.info("send singal: {}".format(job_id)) tx = self.zk.transaction() tx.set_data(node, uuid.uuid4().bytes) tx.commit() # DELETE def delete_job(self, job_id): node = "{}/jobs/{}".format(self.root, job_id) logger.info("delete job: job_id={}".format(job_id)) self._delete_node(node) def delete_signal(self, job_id): node = '{}/signal/{}'.format(self.root, job_id) logger.info("delete singal: {}".format(job_id)) self._delete_node(node) def delete_target(self, job_id, target): target_node = '{}/jobs/{}/targets/{}'.format(self.root, job_id, target) logger.info("delete target: job_id={}, target={}".format( job_id, target)) self._delete_node(target_node) def delete_task(self, job_id, target, task_id): task_node = '{}/jobs/{}/targets/{}/tasks/{}'.format( self.root, job_id, target, task_id) logger.info("delete task: job_id ={}, target={}, task_id={}".format( job_id, target, task_id)) self._delete_node(task_node) ################################# # CALLBACK ## exsit CALLBACK def is_exist_callback(self, callback_node): node = "{}/callback/{}".format(self.root, callback_node) if self.zk.exists(node): return True else: return False ## INIT CALLBACK def init_callback_by_jobid(self, job_id): node = "{}/callback/{}".format(self.root, job_id) job_callback_value = { "job_id": job_id, "status": JobStatus.init.value, "messages": "" } callback = { "callback_level": CallbackLevel.job.value, "callback_info": job_callback_value } ret = self._create_node(node, callback) return ret def init_callback_by_target(self, job_id, target): node = "{}/callback/{}".format(self.root, job_id + "_" + target) target_callback_value = { "job_id": job_id, "target": target, "status": TargetStatus.init.value, "messages": "" } callback = { "callback_level": CallbackLevel.target.value, "callback_info": target_callback_value } ret = self._create_node(node, callback) return ret def init_callback_by_taskid(self, job_id, target, task_id, task_name): node = "{}/callback/{}".format(self.root, task_id) taskid_callback_value = { "job_id": job_id, "target": target, "task_name": task_name, "status": JobStatus.init.value, "messages": "", } callback = { "callback_level": CallbackLevel.task.value, "callback_info": taskid_callback_value } ret = self._create_node(node, callback) return ret ## GET CALLBACK def get_callback_info(self, callback): node = "{}/callback/{}".format(self.root, callback) if self.zk.exists(node): node_value = self._get_node(node) return node_value else: return False ## UPDATE CALLBACK def update_callback_by_jobid(self, job_id, status, messages=None): node = "{}/callback/{}".format(self.root, job_id) if not self.zk.exists(node): return False node_value = self._get_node(node) node_value["callback_info"]["status"] = status if messages is not None: node_value["callback_info"]["messages"] = messages ret = self._update_node(node, node_value) return ret def update_callback_by_target(self, job_id, target, status, messages=None): node = "{}/callback/{}".format(self.root, job_id + "_" + target) if not self.zk.exists(node): return False node_value = self._get_node(node) node_value["callback_info"]["status"] = status if messages is not None: node_value["callback_info"]["messages"] = messages ret = self._update_node(node, node_value) return ret def update_callback_by_taskid(self, job_id, taskid, status, messages=None): node = "{}/callback/{}".format(self.root, taskid) if not self.zk.exists(node): return False node_value = self._get_node(node) node_value["callback_info"]["status"] = status if messages is not None: node_value["callback_info"]["messages"] = messages ret = self._update_node(node, node_value) return ret ## DELETE CALLBACK def delete_callback_node(self, callback): node = "{}/callback/{}".format(self.root, callback) ret = self._delete_node(node) if ret: logger.info( "delete callback node success: callback={}".format(node)) else: logger.error("delete callback node fail: callback={}".format(node)) return ret
class USSMetadataManager(object): """Interfaces with the locking system to get, put, and delete USS metadata. Metadata gets/stores/deletes the USS information for a partiular grid, including current version number, a list of USSs with active operations, and the endpoints to get that information. Locking is assured through a snapshot token received when getting, and used when putting. """ def __init__(self, connectionstring=DEFAULT_CONNECTION, testgroupid=None): """Initializes the class. Args: connectionstring: Zookeeper connection string - server:port,server:port,... testgroupid: ID to use if in test mode, none for normal mode """ if testgroupid: self.set_testmode(testgroupid) if not connectionstring: connectionstring = DEFAULT_CONNECTION log.debug( 'Creating metadata manager object and connecting to zookeeper...') try: if set(BAD_CHARACTER_CHECK) & set(connectionstring): raise ValueError self.zk = KazooClient(hosts=connectionstring, timeout=CONNECTION_TIMEOUT) self.zk.add_listener(self.zookeeper_connection_listener) self.zk.start() if testgroupid: self.delete_testdata(testgroupid) except KazooTimeoutError: log.error( 'Unable to connect to zookeeper using %s connection string...', connectionstring) raise except ValueError: log.error('Connection string %s seems invalid...', connectionstring) raise def __del__(self): log.debug( 'Destroying metadata manager object and disconnecting from zk...') self.zk.stop() def set_verbose(self): log.setLevel(logging.DEBUG) def set_testmode(self, testgroupid='UNDEFINED_TESTER'): """Sets the mode to testing with the specific test ID, cannot be undone. Args: testgroupid: ID to use if in test mode, none for normal mode """ global GRID_PATH global CONNECTION_TIMEOUT # Adjust parameters specifically for the test GRID_PATH = TEST_BASE_PREFIX + testgroupid + USS_BASE_PREFIX log.debug('Setting test path to %s...', GRID_PATH) CONNECTION_TIMEOUT = 1.0 def zookeeper_connection_listener(self, state): if state == KazooState.LOST: # Register somewhere that the session was lost log.error('Lost connection with the zookeeper servers...') elif state == KazooState.SUSPENDED: # Handle being disconnected from Zookeeper log.error('Suspended connection with the zookeeper servers...') elif state == KazooState.CONNECTED: # Handle being connected/reconnected to Zookeeper log.info('Connection restored with the zookeeper servers...') def delete_testdata(self, testgroupid=None): """Removes the test data from the servers. Be careful when using this in parallel as it removes everything under the testgroupid, or everything if no tetgroupid is provided. Args: testgroupid: ID to use if in test mode, none will remove all test data """ if testgroupid: path = TEST_BASE_PREFIX + testgroupid else: path = TEST_BASE_PREFIX self.zk.delete(path, recursive=True) def get(self, z, x, y): """Gets the metadata and snapshot token for a GridCell. Reads data from zookeeper, including a snapshot token. The snapshot token is used as a reference when writing to ensure the data has not been updated between read and write. Args: z: zoom level in slippy tile format x: x tile number in slippy tile format y: y tile number in slippy tile format Returns: JSend formatted response (https://labs.omniti.com/labs/jsend) """ # TODO(hikevin): Change to use our own error codes and let the server # convert them to http error codes. For now, this is # at least in a standard JSend format. status = 500 if slippy_util.validate_slippy(z, x, y): (content, metadata) = self._get_raw(z, x, y) if metadata: try: m = uss_metadata.USSMetadata(content) status = 200 result = { 'status': 'success', 'sync_token': metadata.last_modified_transaction_id, 'data': m.to_json() } except ValueError: status = 424 else: status = 404 else: status = 400 if status != 200: result = self._format_status_code_to_jsend(status) return result def set(self, z, x, y, sync_token, uss_id, ws_scope, operation_format, operation_ws, earliest_operation, latest_operation): """Sets the metadata for a GridCell. Writes data, using the snapshot token for confirming data has not been updated since it was last read. Args: z: zoom level in slippy tile format x: x tile number in slippy tile format y: y tile number in slippy tile format sync_token: token retrieved in the original GET GridCellMetadata, uss_id: plain text identifier for the USS, ws_scope: scope to use to obtain OAuth token, operation_format: output format for operation ws (i.e. NASA, GUTMA), operation_ws: submitting USS endpoint where all flights in this cell can be retrieved from, earliest_operation: lower bound of active or planned flight timestamp, used for quick filtering conflicts. latest_operation: upper bound of active or planned flight timestamp, used for quick filtering conflicts. Returns: JSend formatted response (https://labs.omniti.com/labs/jsend) """ if slippy_util.validate_slippy(z, x, y): # first we have to get the cell (content, metadata) = self._get_raw(z, x, y) if metadata: # Quick check of the token, another is done on the actual set to be sure # but this check fails early and fast if str(metadata.last_modified_transaction_id) == str( sync_token): try: m = uss_metadata.USSMetadata(content) log.debug('Setting metadata for %s...', uss_id) if not m.upsert_operator( uss_id, ws_scope, operation_format, operation_ws, earliest_operation, latest_operation, z, x, y): log.error( 'Failed setting operator for %s with token %s...', uss_id, str(sync_token)) raise ValueError status = self._set_raw(z, x, y, m, metadata.version) except ValueError: status = 424 else: status = 409 else: status = 404 else: status = 400 if status == 200: # Success, now get the metadata back to send back result = self.get(z, x, y) else: result = self._format_status_code_to_jsend(status) return result def delete(self, z, x, y, uss_id): """Sets the metadata for a GridCell by removing the entry for the USS. Args: z: zoom level in slippy tile format x: x tile number in slippy tile format y: y tile number in slippy tile format uss_id: is the plain text identifier for the USS Returns: JSend formatted response (https://labs.omniti.com/labs/jsend) """ status = 500 if slippy_util.validate_slippy(z, x, y): # first we have to get the cell (content, metadata) = self._get_raw(z, x, y) if metadata: try: m = uss_metadata.USSMetadata(content) m.remove_operator(uss_id) # TODO(pelletierb): Automatically retry on delete status = self._set_raw(z, x, y, m, metadata.version) except ValueError: status = 424 else: status = 404 else: status = 400 if status == 200: # Success, now get the metadata back to send back (content, metadata) = self._get_raw(z, x, y) result = { 'status': 'success', 'sync_token': metadata.last_modified_transaction_id, 'data': m.to_json() } else: result = self._format_status_code_to_jsend(status) return result def get_multi(self, z, grids): """Gets the metadata and snapshot token for multiple GridCells. Reads data from zookeeper, including a composite snapshot token. The snapshot token is used as a reference when writing to ensure the data has not been updated between read and write. Args: z: zoom level in slippy tile format grids: list of (x,y) tiles to retrieve Returns: JSend formatted response (https://labs.omniti.com/labs/jsend) """ try: combined_meta, syncs = self._get_multi_raw(z, grids) log.debug('Found sync token %s for %d grids...', self._hash_sync_tokens(syncs), len(syncs)) result = { 'status': 'success', 'sync_token': self._hash_sync_tokens(syncs), 'data': combined_meta.to_json() } except ValueError as e: result = self._format_status_code_to_jsend(400, e.message) except IndexError as e: result = self._format_status_code_to_jsend(404, e.message) return result def set_multi(self, z, grids, sync_token, uss_id, ws_scope, operation_format, operation_ws, earliest_operation, latest_operation): """Sets multiple GridCells metadata at once. Writes data, using the hashed snapshot token for confirming data has not been updated since it was last read. Args: z: zoom level in slippy tile format grids: list of (x,y) tiles to update sync_token: token retrieved in the original get_multi, uss_id: plain text identifier for the USS, ws_scope: scope to use to obtain OAuth token, operation_format: output format for operation ws (i.e. NASA, GUTMA), operation_ws: submitting USS endpoint where all flights in this cell can be retrieved from, earliest_operation: lower bound of active or planned flight timestamp, used for quick filtering conflicts. latest_operation: upper bound of active or planned flight timestamp, used for quick filtering conflicts. Returns: JSend formatted response (https://labs.omniti.com/labs/jsend) """ log.debug('Setting multiple grid metadata for %s...', uss_id) try: # first, get the affected grid's sync tokens m, syncs = self._get_multi_raw(z, grids) del m # Quick check of the token, another is done on the actual set to be sure # but this check fails early and fast log.debug('Found sync token %d for %d grids...', self._hash_sync_tokens(syncs), len(syncs)) if str(self._hash_sync_tokens(syncs)) == str(sync_token): log.debug('Composite sync_token matches, continuing...') self._set_multi_raw(z, grids, syncs, uss_id, ws_scope, operation_format, operation_ws, earliest_operation, latest_operation) log.debug('Completed updating multiple grids...') else: raise KeyError('Composite sync_token has changed') combined_meta, new_syncs = self._get_multi_raw(z, grids) result = { 'status': 'success', 'sync_token': self._hash_sync_tokens(new_syncs), 'data': combined_meta.to_json() } except (KeyError, RolledBackError) as e: result = self._format_status_code_to_jsend(409, e.message) except ValueError as e: result = self._format_status_code_to_jsend(400, e.message) except IndexError as e: result = self._format_status_code_to_jsend(404, e.message) return result def delete_multi(self, uss_id, z, grids): """Sets multiple GridCells metadata by removing the entry for the USS. Reads data from zookeeper, including a snapshot token. The snapshot token is used as a reference when writing to ensure the data has not been updated between read and write. Args: uss_id: is the plain text identifier for the USS z: zoom level in slippy tile format grids: list of (x,y) tiles to delete Returns: JSend formatted response (https://labs.omniti.com/labs/jsend) """ log.debug('Deleting multiple grid metadata for %s...', uss_id) try: if not uss_id: raise ValueError('Invalid uss_id for deleting multi') for x, y in grids: if slippy_util.validate_slippy(z, x, y): (content, metadata) = self._get_raw(z, x, y) if metadata: m = uss_metadata.USSMetadata(content) m.remove_operator(uss_id) # TODO(pelletierb): Automatically retry on delete status = self._set_raw(z, x, y, m, metadata.version) else: raise ValueError('Invalid slippy grids for lookup') result = self.get_multi(z, grids) except ValueError as e: result = self._format_status_code_to_jsend(400, e.message) return result ###################################################################### ################ INTERNAL FUNCTIONS ######################### ###################################################################### def _get_raw(self, z, x, y): """Gets the raw content and metadata for a GridCell from zookeeper. Args: z: zoom level in slippy tile format x: x tile number in slippy tile format y: y tile number in slippy tile format Returns: content: USS metadata metadata: straight from zookeeper """ path = '%s/%s/%s/%s/%s' % (GRID_PATH, str(z), str(x), str(y), USS_METADATA_FILE) log.debug('Getting metadata from zookeeper@%s...', path) try: c, m = self.zk.get(path) except NoNodeError: self.zk.ensure_path(path) c, m = self.zk.get(path) if c: log.debug('Received raw content and metadata from zookeeper: %s', c) if m: log.debug('Received raw metadata from zookeeper: %s', m) return c, m def _set_raw(self, z, x, y, m, version): """Grabs the lock and updates the raw content for a GridCell in zookeeper. Args: z: zoom level in slippy tile format x: x tile number in slippy tile format y: y tile number in slippy tile format m: metadata object to write version: the metadata version verified from the sync_token match Returns: 200 for success, 409 for conflict, 408 for unable to get the lock """ path = '%s/%s/%s/%s/%s' % (GRID_PATH, str(z), str(x), str(y), USS_METADATA_FILE) try: log.debug('Setting metadata to %s...', str(m)) self.zk.set(path, json.dumps(m.to_json()), version) status = 200 except BadVersionError: log.error('Sync token updated before write for %s...', path) status = 409 return status def _get_multi_raw(self, z, grids): """Gets the raw content and metadata for multiple GridCells from zookeeper. Args: z: zoom level in slippy tile format grids: list of (x,y) tiles to retrieve Returns: content: Combined USS metadata syncs: list of sync tokens in the same order as the grids Raises: IndexError: if it cannot find anything in zookeeper ValueError: if the grid data is not in the right format """ log.debug('Getting multiple grid metadata for %s...', str(grids)) combined_meta = None syncs = [] for x, y in grids: if slippy_util.validate_slippy(z, x, y): (content, metadata) = self._get_raw(z, x, y) if metadata: combined_meta += uss_metadata.USSMetadata(content) syncs.append(metadata.last_modified_transaction_id) else: raise IndexError('Unable to find metadata in platform') else: raise ValueError('Invalid slippy grids for lookup') if len(syncs) == 0: raise IndexError('Unable to find metadata in platform') return combined_meta, syncs def _set_multi_raw(self, z, grids, sync_tokens, uss_id, ws_scope, operation_format, operation_ws, earliest_operation, latest_operation): """Grabs the lock and updates the raw content for multiple GridCells Args: z: zoom level in slippy tile format grids: list of (x,y) tiles to retrieve sync_tokens: list of the sync tokens received during get operation uss_id: plain text identifier for the USS, ws_scope: scope to use to obtain OAuth token, operation_format: output format for operation ws (i.e. NASA, GUTMA), operation_ws: submitting USS endpoint where all flights in this cell can be retrieved from, earliest_operation: lower bound of active or planned flight timestamp, used for quick filtering conflicts. latest_operation: upper bound of active or planned flight timestamp, used for quick filtering conflicts. Raises: IndexError: if it cannot find anything in zookeeper ValueError: if the grid data is not in the right format """ log.debug('Setting multiple grid metadata for %s...', str(grids)) try: contents = [] for i in range(len(grids)): # First, get and update them all in memory, validate the sync_token x = grids[i][0] y = grids[i][1] sync_token = sync_tokens[i] path = '%s/%s/%s/%s/%s' % (GRID_PATH, str(z), str(x), str(y), USS_METADATA_FILE) (content, metadata) = self._get_raw(z, x, y) if str(metadata.last_modified_transaction_id) == str( sync_token): log.debug('Sync_token matches for %d, %d...', x, y) m = uss_metadata.USSMetadata(content) if not m.upsert_operator( uss_id, ws_scope, operation_format, operation_ws, earliest_operation, latest_operation, z, x, y): raise ValueError('Failed to set operator content') contents.append((path, m, metadata.version)) else: log.error( 'Sync token from USS (%s) does not match token from zk (%s)...', str(sync_token), str(metadata.last_modified_transaction_id)) raise KeyError('Composite sync_token has changed') # Now, start a transaction to update them all # the version will catch any changes and roll back any attempted # updates to the grids log.debug('Starting transaction to write all grids at once...') t = self.zk.transaction() for path, m, version in contents: t.set_data(path, json.dumps(m.to_json()), version) log.debug('Committing transaction...') results = t.commit() if isinstance(results[0], RolledBackError): raise KeyError( 'Rolled back multi-grid transaction due to grid change') log.debug('Committed transaction successfully.') except (KeyError, ValueError, IndexError) as e: log.error('Error caught in set_multi_raw %s.', e.message) raise e def _format_status_code_to_jsend(self, status, message=None): """Formats a response based on HTTP status code. Args: status: HTTP status code message: optional message to override preset message for codes Returns: JSend formatted response (https://labs.omniti.com/labs/jsend) """ if status == 200 or status == 204: result = { 'status': 'success', 'code': 204, 'message': 'Empty data set.' } elif status == 400: result = { 'status': 'fail', 'code': status, 'message': 'Parameters are not following the correct format.' } elif status == 404: result = { 'status': 'fail', 'code': status, 'message': 'Unable to pull metadata from lock system.' } elif status == 408: result = { 'status': 'fail', 'code': status, 'message': 'Timeout trying to get lock.' } elif status == 409: result = { 'status': 'fail', 'code': status, 'message': 'Content in metadata has been updated since provided sync token.' } elif status == 424: result = { 'status': 'fail', 'code': status, 'message': 'Content in metadata is not following JSON format guidelines.' } else: result = { 'status': 'fail', 'code': status, 'message': 'Unknown error code occurred.' } if message: result['message'] = message return result @staticmethod def _hash_sync_tokens(syncs): """Hashes a list of sync tokens into a single, positive 64-bit int""" log.debug('Hashing syncs: %s', tuple(sorted(syncs))) return abs(hash(tuple(sorted(syncs))))
class ArcusZooKeeper: """ ZooKeeper helper for Arcus """ def __init__(self, hostports, timeout): self.hostports = hostports self.timeout = timeout self.zk = KazooClient(hosts=hostports, read_only=False) def start(self): self.zk.start() def stop(self): self.zk.stop() def init_structure(self): if self.zk.exists("/arcus"): print "init_arcus_structure: fail (/arcus exists)" return False tx = self.zk.transaction() tx.create("/arcus", b"") tx.create("/arcus/cache_list", b"") tx.create("/arcus/client_list", b"") tx.create("/arcus/cache_server_mapping", b"") results = tx.commit() if len(results) > 0: print results return False print "init_structure: success" return True def drop_structure(self): self.zk.delete("/arcus", recursive=True) print "delete_structure: success" def get_structure(self): return self.zk.get_children("/arcus") def get_mapping_for_service(self, service_code): result = [] cache_list = "/arcus/cache_list/%s" % service_code mapping = "/arcus/cache_server_mapping" all = self.zk.get_children(mapping) for ipport in all: codes = self.zk.get_children("%s/%s" % (mapping, ipport)) if len(codes) > 0: if codes[0] == service_code: result.append("%s/%s" % (mapping, ipport)) return result def get_config_for_service(self, service_code): cache_list = "/arcus/cache_list/%s" % service_code data, stat = self.zk.get(cache_list) return json.loads(data), data, stat def update_service_code(self, cluster): cache_list = "/arcus/cache_list/%s" % cluster["serviceCode"] client_list = "/arcus/client_list/%s" % cluster["serviceCode"] mapping = "/arcus/cache_server_mapping" try: delete_list = self.get_mapping_for_service(cluster["serviceCode"]) # 0. Create a transaction tx = self.zk.transaction() # 1. Cache list if self.zk.exists(cache_list): c1, c2, c3 = self.get_config_for_service(cluster["serviceCode"]) cluster["created"] = c1.get("created") cluster["modified"] = str(datetime.datetime.now()) tx.set_data(cache_list, json.dumps(cluster)) else: cluster["created"] = str(datetime.datetime.now()) tx.create("/arcus/cache_list/%s" % cluster["serviceCode"], json.dumps(cluster)) # 2. Client list if self.zk.exists(client_list): pass else: tx.create("/arcus/client_list/%s" % cluster["serviceCode"], b"") # 3. Mapping for each in delete_list: tx.delete("%s/%s" % (each, cluster["serviceCode"])) tx.delete(each) for server in cluster["servers"]: global_config = cluster.get("config", {}) per_node_config = server.get("config", {}) config = dict(global_config.items() + per_node_config.items()) if len(config) == 0: print "update_service_code: config not found for {0}".format(server) continue map_ip = "/arcus/cache_server_mapping/%s:%s" % (server["ip"], config["port"]) map_code = "%s/%s" % (map_ip, cluster["serviceCode"]) tx.create(map_ip, json.dumps(config)) tx.create(map_code, b"") # 4. Commit results = tx.commit() print results except Exception as e: traceback.print_exc() def delete_service_code(self, cluster): cache_list = "/arcus/cache_list/%s" % cluster["serviceCode"] client_list = "/arcus/client_list/%s" % cluster["serviceCode"] mapping = "/arcus/cache_server_mapping" try: delete_list = self.get_mapping_for_service(cluster["serviceCode"]) # 0. Create a transaction tx = self.zk.transaction() # 1. Cache list tx.delete("/arcus/cache_list/%s" % cluster["serviceCode"]) # 2. Client list tx.delete("/arcus/client_list/%s" % cluster["serviceCode"]) # 3. Mapping for each in delete_list: tx.delete("%s/%s" % (each, cluster["serviceCode"])) tx.delete(each) # 4. Commit results = tx.commit() print results except Exception as e: traceback.print_exc() def list_all_service_code(self): result = [] cache_list = "/arcus/cache_list" try: list = self.zk.get_children(cache_list) for each in list: result.append(self.list_service_code(each)) return result except Exception as e: traceback.print_exc() def list_service_code(self, service_code): result = {} cache_list = "/arcus/cache_list/%s" % service_code client_list = "/arcus/client_list/%s" % service_code mapping = "/arcus/cache_server_mapping" try: data, stat = self.zk.get(cache_list) static_list = self.get_mapping_for_service(service_code) current_list = self.zk.get_children(cache_list) # sort the lists static_list.sort() current_list.sort() # get clusterConfig cluster = json.loads(data) # get clusterStatus static_set = set([each.split("/")[-1] for each in static_list]) current_set = set([each.split("-")[0] for each in current_list]) offline = static_set - current_set online = static_set - offline undefined = current_set - static_set result["serviceCode"] = service_code result["config"] = cluster result["online"] = list(online) result["offline"] = list(offline) result["undefined"] = list(undefined) result["created"] = cluster.get("created") result["modified"] = cluster.get("modified") return result except Exception as e: traceback.print_exc()
class Applier: def __init__(self): self._zk = KazooClient(hosts=f'{os.getenv("ZOOKEEPER_HOST")}:2181') self._logger = logging.getLogger(__name__) self._logger.setLevel( logging.getLevelName(os.getenv("LOG_LEVEL", "INFO"))) ch = logging.StreamHandler() ch.setLevel(logging.getLevelName(os.getenv("LOG_LEVEL", "INFO"))) self._logger.addHandler(ch) def start(self): self._logger.debug("Applier started") self._zk.start() self._attempt_to_apply_next_target() scheduler = BlockingScheduler(timezone="UTC") scheduler.add_job(self._attempt_to_apply_next_target, 'interval', minutes=1) scheduler.start() def stop(self): self._zk.stop() def _attempt_to_apply_next_target(self): if (self._is_next_target_ready()): self._apply_next_target() def _apply_next_target(self): self._logger.info("Applying next target") self._zk.ensure_path(ZK_CURRENT_TARGET) next_target_id = self._zk.get(ZK_NEXT_TARGET)[0] tx = self._zk.transaction() tx.set_data(ZK_NEXT_TARGET, b'') tx.set_data(ZK_CURRENT_TARGET, next_target_id) tx.commit() def _is_next_target_ready(self): if (self._zk.exists(ZK_NEXT_TARGET) is None): return False next_target_id = self._zk.get(ZK_NEXT_TARGET)[0].decode() if (not next_target_id or self._zk.exists(f'/phrases/distributor/{next_target_id}') is None): return False partitions = self._zk.get_children( f'/phrases/distributor/{next_target_id}/partitions') if (not partitions): return False for partition in partitions: nodes_path = f'/phrases/distributor/{next_target_id}/partitions/{partition}/nodes' nodes = self._zk.get_children(nodes_path) if (len(nodes) < NUMBER_NODES_PER_PARTITION): return False for node in nodes: hostname = self._zk.get(f'{nodes_path}/{node}')[0].decode() if (not hostname): return False return True
class Node(object): def __init__(self, pool, metadata=None, max_inflight_acquires=1, auto_acquire=True): self.pool = pool self.zk = KazooClient(pool.hosts, timeout=5, handler=SequentialGeventHandler()) event = self.zk.start_async() event.wait(timeout=5) if not self.zk.connected: self.zk.stop() raise Exception('Failed to reach zookeeper') self.metadata = metadata or {} self.id = None self.path = None self.auto_acquire = auto_acquire self.max_resources = 0 # Set of resources we own self.resources = set() self._resource_backoff = {} self._resources_acquiring = gevent.lock.Semaphore(max_inflight_acquires) # Callbacks self.on_acquire_resource = None self.on_release_resource = None self._anti_entropy_greenlet = gevent.spawn(self._anti_entropy) def disconnect(self): self.zk.disconnect() def acquire(self, resource): assert resource in self.pool.resources return self._try_takeover(resource, force=True) def release(self, resource): assert resource in self.resources # TODO: transaction here self.zk.delete(os.path.join(self.pool.path, 'leaders', resource)) def leave(self): for resource in list(self.resources): self.release(resource) def join(self): path = self.zk.create(os.path.join(self.pool.path, 'nodes', ''), ephemeral=True, sequence=True) self.path = path self.id = path.rsplit('/', 1)[-1] # Watch for leadership changes so we can possibly take over ChildrenWatch(self.zk, os.path.join(self.pool.path, 'leaders'), self._on_leaders_change) # Now that we've joined, lets see if there are any dangling resources we # can take ownership of gevent.spawn(self._check_for_takeover, delay=0) def _on_leaders_change(self, data): # TODO: debounce this instead of just sleeping gevent.spawn(self._check_for_takeover, delay=5) def _on_resource_leader_change(self, data, stat, event): if not event: return resource_name = event.path.split('/')[-1] if resource_name not in self.pool.resources: return if resource_name in self.resources: if event.type == 'DELETED' or data != self.id: self._resource_backoff[resource_name] = time.time() self.resources.remove(resource_name) if callable(self.on_release_resource): self.on_release_resource(self, resource_name) return False if event.type == 'DELETED': self._try_takeover(resource_name) def _check_for_takeover(self, delay=5): if not self.auto_acquire: return time.sleep(delay) resources_with_leaders = set(self.zk.get_children(os.path.join(self.pool.path, 'leaders'))) resources_without_leaders = self.pool.resources - resources_with_leaders for resource in resources_without_leaders: self._try_takeover(resource) # If we have more than the even-split number of resources, backoff a bit if len(self.resources) > len(self.pool.resources) / len(self.pool.nodes): time.sleep(1) def _try_takeover(self, resource, force=False): if self.max_resources and len(self.resources) >= self.max_resources: return False if not force and resource in self._resource_backoff: if time.time() - self._resource_backoff[resource] < 10: return False del self._resource_backoff[resource] if self._resources_acquiring.locked(): return False with self._resources_acquiring: path = os.path.join(self.pool.path, 'leaders', resource) try: self.zk.create(path, unicode.encode(self.id), ephemeral=True) except NodeExistsError: if not force: return False _, metadata = self.zk.get(path) transaction = self.zk.transaction() transaction.delete(path, version=metadata.version) transaction.create(path, unicode.encode(self.id), ephemeral=True) result = transaction.commit() if result[0] is not True or result[1] != path: return False DataWatch(self.zk, path, self._on_resource_leader_change) self.resources.add(resource) if callable(self.on_acquire_resource): self.on_acquire_resource(self, resource) return True def balance(self): threshold = math.ceil(len(self.pool.resources) / (len(self.pool.nodes) * 1.0)) our_value = len(self.resources) if our_value > threshold + 1: resource = random.choice(list(self.resources)) self._resource_backoff[resource] = time.time() self.release(resource) def _anti_entropy(self): while True: time.sleep(10) self.balance()
class ZookClient(object): def __init__(self, zookeeper_host=None): logging.basicConfig() # Create a client and start it if zookeeper_host is None: zookeeper_host = "localhost:2181" self.zk = KazooClient(zookeeper_host) self.zk.start() self.tran = self.zk.transaction() self.CONST_BASE_PATH = "/dso/" self.CONST_ACCOUNTS_PATH = "accounts/" self.CONST_GROUPS_PATH = "groups/" self.CONST_USERS_PATH = "users/" self.CONST_HOSTS_PATH = "hosts/" self.CONST_SERVERS_PATH = "servers/" self.CONST_SERVICES_PATH = "services/" self.CONST_INSTANCES_PATH = "instances/" self.CONST_MAPPING_PATH = "Mapping/" self.CONST_IP2USER_PATH = "Ip2User/" self.CONST_MAC2USER_PATH = "Mac2User/" self.CONST_IP2VMPATH = "Ip2Vm/" self.CONST_USER2ACCOUNT_PATH = "User2Account/" self.CONST_AID2ANAME_PATH = "Aid2Aname/" self.CONST_VM_INFO_PATH = "VmInfo/" self.CONST_HOST_MAPPING = dict(vrouter='fedora', dns='ubuntu', firewall='ubuntu', ipsecvpn='centos', vpc='ubuntu') def create_accounts_path(self, accounts, **kwargs): # create accounts path accounts_path = self.CONST_BASE_PATH + self.CONST_ACCOUNTS_PATH self.zk.ensure_path(accounts_path) for account in accounts: # path = accounts_path + account.account_name path = accounts_path + account.id account_val = account.to_str() self.zk.ensure_path(path) self.zk.set(path, b"" + account_val.encode('utf8')) def create_account_path(self, account_info): """ the path to a account :param account_info: """ # create accounts path accounts_path = self.CONST_BASE_PATH + self.CONST_ACCOUNTS_PATH if not self.zk.exists(accounts_path): self.zk.ensure_path(accounts_path) path = accounts_path + account_info.id account_data = dict(id=account_info.id, name=account_info.account_name) self.zk.ensure_path(path) self.zk.set(path, b"" + json.dumps(account_data).encode('utf8')) def create_accountinfo_path(self, account_info): account_path = self.CONST_BASE_PATH + self.CONST_ACCOUNTS_PATH + account_info.id + "/" if not self.zk.exists(account_path): print "Path" + account_path + " does not exist." # create groups path groups_path = account_path + self.CONST_GROUPS_PATH self.zk.ensure_path(groups_path) # set each group for group_item in account_info.groups: self.create_usergroup_path(groups_path, group_item) # create service path services_path = account_path + self.CONST_SERVICES_PATH self.zk.ensure_path(services_path) # set services summary self.gen_services_summay(services_path, account_info.services) # set each service for service_item in account_info.services: self.create_service_path(services_path, service_item) def create_service_path(self, parent_path, service): if service is None or service.servicename is None: return service_path = parent_path + service.servicename + "/" # service_data = "id: " + service.id + "\nservicename: " + service.servicename service_data = dict(id=service.id, servicename=service.servicename) self.zk.ensure_path(service_path) self.zk.set(service_path, b"" + json.dumps(service_data).encode('utf8')) # create instances path instances_path = service_path + self.CONST_INSTANCES_PATH self.zk.ensure_path(instances_path) # set each instance for instance_item in service.instances: self.create_instance_path(instances_path, instance_item, service.servicename) def create_instance_path(self, parent_path, instance, service_name): if instance.id is None: print "instance.id is None" return instance_path = parent_path + instance.id # instance_data = "id: " + instance.id + \ # "\nmac: " + self.check_none(instance.mac) + \ # "\nmanageip: " + self.check_none(instance.manageip) + \ # "\npublicip: " + self.check_none(instance.publicip) + \ # "\npublicgateway: " + self.check_none(instance.publicgateway) + \ # "\npublicnetmask: " + self.check_none(instance.publicnetmask) + \ # "\nserviceip: " + self.check_none(instance.serviceip) + \ # "\nstatus: " + self.check_none(instance.status) instance_data = dict(id=instance.id, mac=instance.mac, manageip=instance.manageip, publicip=instance.publicip, publicgateway=instance.publicgateway, publicnetmask=instance.publicnetmask, serviceip=instance.serviceip, status=instance.status, user=self.CONST_HOST_MAPPING[service_name]) self.zk.ensure_path(instance_path) # self.zk.set(instance_path, b"" + instance_data.encode('utf8')) self.zk.set(instance_path, b"" + json.dumps(instance_data).encode('utf8')) return instance_path def create_usergroup_path(self, parent_path, group): if group is None or group.groupname is None: return group_path = parent_path + group.groupname + "/" # group_data = "id: " + group.id + "\ngroupname: " + \ # group.groupname + "\ndescription: " + self.check_none(group.description) group_data = dict(id=group.id, groupname=group.groupname, description=group.description) self.zk.ensure_path(group_path) self.zk.set(group_path, b"" + json.dumps(group_data).encode('utf8')) # create users path users_path = group_path + self.CONST_USERS_PATH self.zk.ensure_path(users_path) # set each user for user_item in group.users: self.create_user_path(users_path, user_item) # create servers path servers_path = group_path + self.CONST_SERVERS_PATH self.zk.ensure_path(servers_path) # set each server/host for server in group.servers: self.create_host_path(servers_path, server) def create_user_path(self, parent_path, user): if user is None or user.name is None: print "user is None" return user_path = parent_path + user.name + "/" # user_data = "id: " + user.id + "\nname: " + \ # user.name + "\nemail: " + self.check_none(user.email) user_data = dict(id=user.id, name=user.name, email=user.email) self.zk.ensure_path(user_path) self.zk.set(user_path, b"" + json.dumps(user_data).encode('utf8')) # create hosts path hosts_path = user_path + self.CONST_HOSTS_PATH self.zk.ensure_path(hosts_path) # set each hosts for host_item in user.hosts: self.create_host_path(hosts_path, host_item) def create_host_path(self, parent_path, host): if host is None or host.mac is None: return host_path = parent_path + host.mac # host_data = "ip: " + self.check_none(host.ip) + "\nmac: " + \ # host.mac + "\nhost_name: " + \ # self.check_none(host.mac) + \ # "\ndomain: " + self.check_none(host.domain) host_data = dict(ip=host.ip, mac=host.mac, host_name=host.host_name, domain=host.domain) self.zk.ensure_path(host_path) self.zk.set(host_path, b"" + json.dumps(host_data).encode('utf8')) return host_path def delete_dso_path(self): self.zk.delete("/dso", recursive=True) self.zk.ensure_path("/dso") def delete_account_path(self, account_id): account_path = self.CONST_BASE_PATH + self.CONST_ACCOUNTS_PATH + account_id self.zk.delete(account_path, recursive=True) def delete_account_mapping_path(self, account_id): ip2vm_path = self.CONST_BASE_PATH + self.CONST_MAPPING_PATH + self.CONST_IP2VMPATH + account_id ip2user_path = self.CONST_BASE_PATH + self.CONST_MAPPING_PATH + self.CONST_IP2USER_PATH + account_id mac2user_path = self.CONST_BASE_PATH + self.CONST_MAPPING_PATH + self.CONST_MAC2USER_PATH + account_id self.zk.delete(ip2vm_path, recursive=True) self.zk.delete(ip2user_path, recursive=True) self.zk.delete(mac2user_path, recursive=True) @staticmethod def check_none(data): return "None" if data is None else data def create_user2account_path(self, user2account): path = self.CONST_BASE_PATH + self.CONST_MAPPING_PATH + self.CONST_USER2ACCOUNT_PATH self.zk.ensure_path(path) self.zk.set(path, b"" + json.dumps(user2account).encode('utf8')) def create_aid2aname_path(self, aid2aname): if not self.zk.exists(self.CONST_BASE_PATH + self.CONST_MAPPING_PATH): self.zk.ensure_path(self.CONST_BASE_PATH + self.CONST_MAPPING_PATH) path = self.CONST_BASE_PATH + self.CONST_MAPPING_PATH + self.CONST_AID2ANAME_PATH self.zk.ensure_path(path) self.zk.set(path, b"" + json.dumps(aid2aname).encode('utf8')) def create_ip2user_path(self, account_info, ip2user): if not self.zk.exists(self.CONST_BASE_PATH + self.CONST_MAPPING_PATH): self.zk.ensure_path(self.CONST_BASE_PATH + self.CONST_MAPPING_PATH) ip2user_path = self.CONST_BASE_PATH + self.CONST_MAPPING_PATH + self.CONST_IP2USER_PATH self.zk.ensure_path(ip2user_path) account_path = ip2user_path + account_info.id + "/" self.zk.ensure_path(account_path) account_data = {"account_name": account_info.account_name} self.zk.set(account_path, b"" + json.dumps(account_data).encode('utf8')) for ip in ip2user.keys(): if ip is None: print "Ip is None" continue ip_path = account_path + ip self.zk.ensure_path(ip_path) self.zk.set(ip_path, b"" + json.dumps(ip2user[ip]).encode('utf8')) def create_mac2user_path(self, account_info, mac2user): mac2user_path = self.CONST_BASE_PATH + self.CONST_MAPPING_PATH + self.CONST_MAC2USER_PATH self.zk.ensure_path(mac2user_path) account_path = mac2user_path + account_info.id + "/" self.zk.ensure_path(account_path) account_data = {"account_name": account_info.account_name} self.zk.set(account_path, b"" + json.dumps(account_data).encode('utf8')) for mac in mac2user.keys(): if mac is None: print "mac is None" continue ip_path = account_path + mac self.zk.ensure_path(ip_path) self.zk.set(ip_path, b"" + json.dumps(mac2user[mac]).encode('utf8')) def gen_mapping_pre_account(self, account_info, vpn_clients): self.zk.ensure_path(self.CONST_BASE_PATH + self.CONST_MAPPING_PATH) self.zk.ensure_path(self.CONST_BASE_PATH + self.CONST_MAPPING_PATH + self.CONST_IP2VMPATH) self.zk.ensure_path(self.CONST_BASE_PATH + self.CONST_MAPPING_PATH + self.CONST_MAC2USER_PATH) user2account = {} ip2user = {} mac2user = {} for group in account_info.groups: for user in group.users: user2account[user.id] = account_info.id for host in user.hosts: user_data = dict(userid=user.id, username=user.name, useremail=user.email, groupname=group.groupname) if host.ip is not None: ip2user[host.ip] = user_data mac2user[host.mac] = user_data # add vpn info for vpn in vpn_clients: vpn_user_data = dict(userid='vpn_user', username=vpn.user_name, useremail='', groupname=vpn.group) if vpn.ip is not None: vpn_ip = vpn.ip if vpn_ip.__contains__("/"): vpn_ip = vpn_ip.split("/")[0] ip2user[vpn_ip] = vpn_user_data # create node self.create_ip2user_path(account_info, ip2user) # create node mac2user self.create_mac2user_path(account_info, mac2user) # not used now # self.create_user2account_path(user2account) # create ip2vm self.create_ip2vm_path(account_info) def gen_vm_path(self, vm_info): if not self.zk.exists(self.CONST_BASE_PATH + self.CONST_MAPPING_PATH): self.zk.ensure_path(self.CONST_BASE_PATH + self.CONST_MAPPING_PATH) vms_path = self.CONST_BASE_PATH + self.CONST_MAPPING_PATH + self.CONST_VM_INFO_PATH self.zk.ensure_path(vms_path) for vm in vm_info: vm_node_path = vms_path + vm.manage_ip.replace('/', '-') vm_dict = dict(id=vm.id, type=vm.type, status=vm.status) self.zk.ensure_path(vm_node_path) self.zk.set(vm_node_path, b"" + json.dumps(vm_dict).encode('utf8')) def create_mapping_accounts(self, accounts): aid2aname = {} for account in accounts: aid2aname[account.id] = account.account_name self.create_aid2aname_path(aid2aname) def gen_services_summay(self, path, services): services_data = {} for service_item in services: instance_array = "" for instance in service_item.instances: instance_data = "<table class=\"table\">" \ "<tr>" \ "<td>manageip</td><td>" + self.check_none(instance.manageip) \ + "</td>" \ "</tr>" \ "<tr>" \ "<td>publicip</td><td>" + self.check_none( instance.publicip) + "</td>" \ "</tr>" \ "<tr>" \ "<td>publicgateway</td><td>" + self.check_none( instance.publicgateway) + "</td>" \ "</tr>" \ "<tr>" \ "<td>publicnetmask</td><td>" + self.check_none( instance.publicnetmask) + "</td>" \ "</tr>" \ "<tr>" \ "<td>serviceip</td><td>" + self.check_none(instance.serviceip) \ + "</td>" \ "</tr>" \ "<tr><td>status</td><td>" + self.check_none( instance.status) + "</td>" \ "</tr>" \ "<tr><td>user</td><td>" + \ self.check_none(self.CONST_HOST_MAPPING[service_item.servicename]) \ + "</td>" \ "</tr>" \ "</table>" instance_array += instance_data + "\n" services_data[service_item.servicename] = instance_array self.zk.set(path, b"" + json.dumps(services_data).encode('utf8')) def create_ip2vm_path(self, account_info): base_path = self.CONST_BASE_PATH + self.CONST_MAPPING_PATH + self.CONST_IP2VMPATH path = base_path + account_info.id + "/" # self.zk.ensure_path(path) account_data = dict(accountname=account_info.account_name) self.zk.create(path, b"" + json.dumps(account_data).encode('utf8')) self.zk.ensure_path(path) for service_item in account_info.services: service_name = service_item.servicename for instance in service_item.instances: if instance.manageip is None: continue manageip = instance.manageip if instance.manageip.__contains__('/'): manageip = manageip[:manageip.find('/')] vm_path = path + manageip host_name = account_info.id + '-' + service_name instance_data = dict(id=instance.id, mac=instance.mac, manageip=instance.manageip, publicip=instance.publicip, publicgateway=instance.publicgateway, publicnetmask=instance.publicnetmask, serviceip=instance.serviceip, status=instance.status, servicename=service_name, hostname=host_name) # self.zk.ensure_path(vm_path) self.zk.create(vm_path, b"" + json.dumps(instance_data).encode('utf8')) def get_all_account(self): accounts_path = self.CONST_BASE_PATH + self.CONST_ACCOUNTS_PATH account_list = self.zk.get_children(accounts_path) return account_list def commit(self): self.tran.commit() def stopZooK(self): # In the end, stop it self.zk.stop()
class ZookClient(object): def __init__(self, zookeeper_host=None): logging.basicConfig() # Create a client and start it if zookeeper_host is None: zookeeper_host = "localhost:2181" self.zk = KazooClient(zookeeper_host) self.zk.start() self.tran = self.zk.transaction() self.CONST_BASE_PATH = "/dso/" self.CONST_ACCOUNTS_PATH = "accounts/" self.CONST_GROUPS_PATH = "groups/" self.CONST_USERS_PATH = "users/" self.CONST_HOSTS_PATH = "hosts/" self.CONST_SERVERS_PATH = "servers/" self.CONST_SERVICES_PATH = "services/" self.CONST_INSTANCES_PATH = "instances/" self.CONST_MAPPING_PATH = "Mapping/" self.CONST_IP2USER_PATH = "Ip2User/" self.CONST_MAC2USER_PATH = "Mac2User/" self.CONST_IP2VMPATH = "Ip2Vm/" self.CONST_USER2ACCOUNT_PATH = "User2Account/" self.CONST_AID2ANAME_PATH = "Aid2Aname/" self.CONST_VM_INFO_PATH = "VmInfo/" self.CONST_HOST_MAPPING = dict( vrouter="fedora", dns="ubuntu", firewall="ubuntu", ipsecvpn="centos", vpc="ubuntu" ) def create_accounts_path(self, accounts, **kwargs): # create accounts path accounts_path = self.CONST_BASE_PATH + self.CONST_ACCOUNTS_PATH self.zk.ensure_path(accounts_path) for account in accounts: # path = accounts_path + account.account_name path = accounts_path + account.id account_val = account.to_str() self.zk.ensure_path(path) self.zk.set(path, b"" + account_val.encode("utf8")) def create_account_path(self, account_info): """ the path to a account :param account_info: """ # create accounts path accounts_path = self.CONST_BASE_PATH + self.CONST_ACCOUNTS_PATH if not self.zk.exists(accounts_path): self.zk.ensure_path(accounts_path) path = accounts_path + account_info.id account_data = dict(id=account_info.id, name=account_info.account_name) self.zk.ensure_path(path) self.zk.set(path, b"" + json.dumps(account_data).encode("utf8")) def create_accountinfo_path(self, account_info): account_path = self.CONST_BASE_PATH + self.CONST_ACCOUNTS_PATH + account_info.id + "/" if not self.zk.exists(account_path): print "Path" + account_path + " does not exist." # create groups path groups_path = account_path + self.CONST_GROUPS_PATH self.zk.ensure_path(groups_path) # set each group for group_item in account_info.groups: self.create_usergroup_path(groups_path, group_item) # create service path services_path = account_path + self.CONST_SERVICES_PATH self.zk.ensure_path(services_path) # set services summary self.gen_services_summay(services_path, account_info.services) # set each service for service_item in account_info.services: self.create_service_path(services_path, service_item) def create_service_path(self, parent_path, service): if service is None or service.servicename is None: return service_path = parent_path + service.servicename + "/" # service_data = "id: " + service.id + "\nservicename: " + service.servicename service_data = dict(id=service.id, servicename=service.servicename) self.zk.ensure_path(service_path) self.zk.set(service_path, b"" + json.dumps(service_data).encode("utf8")) # create instances path instances_path = service_path + self.CONST_INSTANCES_PATH self.zk.ensure_path(instances_path) # set each instance for instance_item in service.instances: self.create_instance_path(instances_path, instance_item, service.servicename) def create_instance_path(self, parent_path, instance, service_name): if instance.id is None: print "instance.id is None" return instance_path = parent_path + instance.id # instance_data = "id: " + instance.id + \ # "\nmac: " + self.check_none(instance.mac) + \ # "\nmanageip: " + self.check_none(instance.manageip) + \ # "\npublicip: " + self.check_none(instance.publicip) + \ # "\npublicgateway: " + self.check_none(instance.publicgateway) + \ # "\npublicnetmask: " + self.check_none(instance.publicnetmask) + \ # "\nserviceip: " + self.check_none(instance.serviceip) + \ # "\nstatus: " + self.check_none(instance.status) instance_data = dict( id=instance.id, mac=instance.mac, manageip=instance.manageip, publicip=instance.publicip, publicgateway=instance.publicgateway, publicnetmask=instance.publicnetmask, serviceip=instance.serviceip, status=instance.status, user=self.CONST_HOST_MAPPING[service_name], ) self.zk.ensure_path(instance_path) # self.zk.set(instance_path, b"" + instance_data.encode('utf8')) self.zk.set(instance_path, b"" + json.dumps(instance_data).encode("utf8")) return instance_path def create_usergroup_path(self, parent_path, group): if group is None or group.groupname is None: return group_path = parent_path + group.groupname + "/" # group_data = "id: " + group.id + "\ngroupname: " + \ # group.groupname + "\ndescription: " + self.check_none(group.description) group_data = dict(id=group.id, groupname=group.groupname, description=group.description) self.zk.ensure_path(group_path) self.zk.set(group_path, b"" + json.dumps(group_data).encode("utf8")) # create users path users_path = group_path + self.CONST_USERS_PATH self.zk.ensure_path(users_path) # set each user for user_item in group.users: self.create_user_path(users_path, user_item) # create servers path servers_path = group_path + self.CONST_SERVERS_PATH self.zk.ensure_path(servers_path) # set each server/host for server in group.servers: self.create_host_path(servers_path, server) def create_user_path(self, parent_path, user): if user is None or user.name is None: print "user is None" return user_path = parent_path + user.name + "/" # user_data = "id: " + user.id + "\nname: " + \ # user.name + "\nemail: " + self.check_none(user.email) user_data = dict(id=user.id, name=user.name, email=user.email) self.zk.ensure_path(user_path) self.zk.set(user_path, b"" + json.dumps(user_data).encode("utf8")) # create hosts path hosts_path = user_path + self.CONST_HOSTS_PATH self.zk.ensure_path(hosts_path) # set each hosts for host_item in user.hosts: self.create_host_path(hosts_path, host_item) def create_host_path(self, parent_path, host): if host is None or host.mac is None: return host_path = parent_path + host.mac # host_data = "ip: " + self.check_none(host.ip) + "\nmac: " + \ # host.mac + "\nhost_name: " + \ # self.check_none(host.mac) + \ # "\ndomain: " + self.check_none(host.domain) host_data = dict(ip=host.ip, mac=host.mac, host_name=host.host_name, domain=host.domain) self.zk.ensure_path(host_path) self.zk.set(host_path, b"" + json.dumps(host_data).encode("utf8")) return host_path def delete_dso_path(self): self.zk.delete("/dso", recursive=True) self.zk.ensure_path("/dso") def delete_account_path(self, account_id): account_path = self.CONST_BASE_PATH + self.CONST_ACCOUNTS_PATH + account_id self.zk.delete(account_path, recursive=True) def delete_account_mapping_path(self, account_id): ip2vm_path = self.CONST_BASE_PATH + self.CONST_MAPPING_PATH + self.CONST_IP2VMPATH + account_id ip2user_path = self.CONST_BASE_PATH + self.CONST_MAPPING_PATH + self.CONST_IP2USER_PATH + account_id mac2user_path = self.CONST_BASE_PATH + self.CONST_MAPPING_PATH + self.CONST_MAC2USER_PATH + account_id self.zk.delete(ip2vm_path, recursive=True) self.zk.delete(ip2user_path, recursive=True) self.zk.delete(mac2user_path, recursive=True) @staticmethod def check_none(data): return "None" if data is None else data def create_user2account_path(self, user2account): path = self.CONST_BASE_PATH + self.CONST_MAPPING_PATH + self.CONST_USER2ACCOUNT_PATH self.zk.ensure_path(path) self.zk.set(path, b"" + json.dumps(user2account).encode("utf8")) def create_aid2aname_path(self, aid2aname): if not self.zk.exists(self.CONST_BASE_PATH + self.CONST_MAPPING_PATH): self.zk.ensure_path(self.CONST_BASE_PATH + self.CONST_MAPPING_PATH) path = self.CONST_BASE_PATH + self.CONST_MAPPING_PATH + self.CONST_AID2ANAME_PATH self.zk.ensure_path(path) self.zk.set(path, b"" + json.dumps(aid2aname).encode("utf8")) def create_ip2user_path(self, account_info, ip2user): if not self.zk.exists(self.CONST_BASE_PATH + self.CONST_MAPPING_PATH): self.zk.ensure_path(self.CONST_BASE_PATH + self.CONST_MAPPING_PATH) ip2user_path = self.CONST_BASE_PATH + self.CONST_MAPPING_PATH + self.CONST_IP2USER_PATH self.zk.ensure_path(ip2user_path) account_path = ip2user_path + account_info.id + "/" self.zk.ensure_path(account_path) account_data = {"account_name": account_info.account_name} self.zk.set(account_path, b"" + json.dumps(account_data).encode("utf8")) for ip in ip2user.keys(): if ip is None: print "Ip is None" continue ip_path = account_path + ip self.zk.ensure_path(ip_path) self.zk.set(ip_path, b"" + json.dumps(ip2user[ip]).encode("utf8")) def create_mac2user_path(self, account_info, mac2user): mac2user_path = self.CONST_BASE_PATH + self.CONST_MAPPING_PATH + self.CONST_MAC2USER_PATH self.zk.ensure_path(mac2user_path) account_path = mac2user_path + account_info.id + "/" self.zk.ensure_path(account_path) account_data = {"account_name": account_info.account_name} self.zk.set(account_path, b"" + json.dumps(account_data).encode("utf8")) for mac in mac2user.keys(): if mac is None: print "mac is None" continue ip_path = account_path + mac self.zk.ensure_path(ip_path) self.zk.set(ip_path, b"" + json.dumps(mac2user[mac]).encode("utf8")) def gen_mapping_pre_account(self, account_info, vpn_clients): self.zk.ensure_path(self.CONST_BASE_PATH + self.CONST_MAPPING_PATH) self.zk.ensure_path(self.CONST_BASE_PATH + self.CONST_MAPPING_PATH + self.CONST_IP2VMPATH) self.zk.ensure_path(self.CONST_BASE_PATH + self.CONST_MAPPING_PATH + self.CONST_MAC2USER_PATH) user2account = {} ip2user = {} mac2user = {} for group in account_info.groups: for user in group.users: user2account[user.id] = account_info.id for host in user.hosts: user_data = dict( userid=user.id, username=user.name, useremail=user.email, groupname=group.groupname ) if host.ip is not None: ip2user[host.ip] = user_data mac2user[host.mac] = user_data # add vpn info for vpn in vpn_clients: vpn_user_data = dict(userid="vpn_user", username=vpn.user_name, useremail="", groupname=vpn.group) if vpn.ip is not None: vpn_ip = vpn.ip if vpn_ip.__contains__("/"): vpn_ip = vpn_ip.split("/")[0] ip2user[vpn_ip] = vpn_user_data # create node self.create_ip2user_path(account_info, ip2user) # create node mac2user self.create_mac2user_path(account_info, mac2user) # not used now # self.create_user2account_path(user2account) # create ip2vm self.create_ip2vm_path(account_info) def gen_vm_path(self, vm_info): if not self.zk.exists(self.CONST_BASE_PATH + self.CONST_MAPPING_PATH): self.zk.ensure_path(self.CONST_BASE_PATH + self.CONST_MAPPING_PATH) vms_path = self.CONST_BASE_PATH + self.CONST_MAPPING_PATH + self.CONST_VM_INFO_PATH self.zk.ensure_path(vms_path) for vm in vm_info: vm_node_path = vms_path + vm.manage_ip.replace("/", "-") vm_dict = dict(id=vm.id, type=vm.type, status=vm.status) self.zk.ensure_path(vm_node_path) self.zk.set(vm_node_path, b"" + json.dumps(vm_dict).encode("utf8")) def create_mapping_accounts(self, accounts): aid2aname = {} for account in accounts: aid2aname[account.id] = account.account_name self.create_aid2aname_path(aid2aname) def gen_services_summay(self, path, services): services_data = {} for service_item in services: instance_array = "" for instance in service_item.instances: instance_data = ( '<table class="table">' "<tr>" "<td>manageip</td><td>" + self.check_none(instance.manageip) + "</td>" "</tr>" "<tr>" "<td>publicip</td><td>" + self.check_none(instance.publicip) + "</td>" "</tr>" "<tr>" "<td>publicgateway</td><td>" + self.check_none(instance.publicgateway) + "</td>" "</tr>" "<tr>" "<td>publicnetmask</td><td>" + self.check_none(instance.publicnetmask) + "</td>" "</tr>" "<tr>" "<td>serviceip</td><td>" + self.check_none(instance.serviceip) + "</td>" "</tr>" "<tr><td>status</td><td>" + self.check_none(instance.status) + "</td>" "</tr>" "<tr><td>user</td><td>" + self.check_none(self.CONST_HOST_MAPPING[service_item.servicename]) + "</td>" "</tr>" "</table>" ) instance_array += instance_data + "\n" services_data[service_item.servicename] = instance_array self.zk.set(path, b"" + json.dumps(services_data).encode("utf8")) def create_ip2vm_path(self, account_info): base_path = self.CONST_BASE_PATH + self.CONST_MAPPING_PATH + self.CONST_IP2VMPATH path = base_path + account_info.id + "/" # self.zk.ensure_path(path) account_data = dict(accountname=account_info.account_name) self.zk.create(path, b"" + json.dumps(account_data).encode("utf8")) self.zk.ensure_path(path) for service_item in account_info.services: service_name = service_item.servicename for instance in service_item.instances: if instance.manageip is None: continue manageip = instance.manageip if instance.manageip.__contains__("/"): manageip = manageip[: manageip.find("/")] vm_path = path + manageip host_name = account_info.id + "-" + service_name instance_data = dict( id=instance.id, mac=instance.mac, manageip=instance.manageip, publicip=instance.publicip, publicgateway=instance.publicgateway, publicnetmask=instance.publicnetmask, serviceip=instance.serviceip, status=instance.status, servicename=service_name, hostname=host_name, ) # self.zk.ensure_path(vm_path) self.zk.create(vm_path, b"" + json.dumps(instance_data).encode("utf8")) def get_all_account(self): accounts_path = self.CONST_BASE_PATH + self.CONST_ACCOUNTS_PATH account_list = self.zk.get_children(accounts_path) return account_list def commit(self): self.tran.commit() def stopZooK(self): # In the end, stop it self.zk.stop()
class ZkSyncManager(object): RETRIES = 2 LOCK_TIMEOUT = 3 def __init__(self, host='127.0.0.1:2181', lock_path_prefix='/mastermind/locks/'): self.client = KazooClient(host, timeout=3) logger.info( 'Connecting to zookeeper host {}, lock_path_prefix: {}'.format( host, lock_path_prefix)) try: self.client.start() except Exception as e: logger.error(e) raise self._retry = KazooRetry(max_tries=self.RETRIES) self.lock_path_prefix = lock_path_prefix @contextmanager def lock(self, lockid, blocking=True, timeout=LOCK_TIMEOUT): lock = Lock(self.client, self.lock_path_prefix + lockid) try: acquired = lock.acquire(blocking=blocking, timeout=timeout) logger.debug('Lock {0} acquired: {1}'.format(lockid, acquired)) if not acquired: # TODO: Change exception time or set all required parameters for # this type of exception raise LockAlreadyAcquiredError(lock_id=lockid) yield except LockTimeout: logger.info( 'Failed to acquire lock {} due to timeout ({} seconds)'.format( lockid, timeout)) raise LockFailedError(lock_id=lockid) except LockAlreadyAcquiredError: raise except LockError as e: logger.error('Failed to acquire lock {0}: {1}\n{2}'.format( lockid, e, traceback.format_exc())) raise finally: lock.release() def persistent_locks_acquire(self, locks, data=''): try: retry = self._retry.copy() result = retry(self._inner_persistent_locks_acquire, locks=locks, data=data) except RetryFailedError: raise LockError( 'Failed to acquire persistent locks {} after several retries'. format(locks)) except KazooException as e: logger.error( 'Failed to fetch persistent locks {0}: {1}\n{2}'.format( locks, e, traceback.format_exc())) raise LockError return result def _inner_persistent_locks_acquire(self, locks, data): ensured_paths = set() tr = self.client.transaction() for lockid in locks: path = self.lock_path_prefix + lockid parts = path.rsplit('/', 1) if len(parts) == 2 and parts[0] not in ensured_paths: self.client.ensure_path(parts[0]) ensured_paths.add(parts[0]) tr.create(path, data) failed = False failed_locks = [] result = tr.commit() for i, res in enumerate(result): if isinstance(res, ZookeeperError): failed = True if isinstance(res, NodeExistsError): failed_locks.append(locks[i]) if failed_locks: holders = [] for f in failed_locks: # TODO: fetch all holders with 1 transaction request holders.append((f, self.client.get(self.lock_path_prefix + f))) foreign_holders = [(l, h) for l, h in holders if h[0] != data] failed_lock, holder_resp = foreign_holders and foreign_holders[ 0] or holders[0] holder = holder_resp[0] holders_ids = list(set([h[0] for _, h in holders])) logger.warn('Persistent lock {0} is already set by {1}'.format( failed_lock, holder)) raise LockAlreadyAcquiredError( 'Lock for {0} is already acquired by job {1}'.format( failed_lock, holder), lock_id=failed_lock, holder_id=holder, lock_ids=failed_locks, holders_ids=holders_ids) elif failed: logger.error( 'Failed to set persistent locks {0}, result: {1}'.format( locks, result)) raise LockError return True def get_children_locks(self, lock_prefix): try: retry = self._retry.copy() result = retry(self.__inner_get_children_locks, lock_prefix) except RetryFailedError: raise LockError('Failed to get fetch children locks for {}'.format( lock_prefix)) return result def __inner_get_children_locks(self, lock_prefix): full_path = self.lock_path_prefix + lock_prefix self.client.ensure_path(os.path.normpath(full_path)) result = self.client.get_children(full_path) return ['{0}{1}'.format(lock_prefix, lock) for lock in result] def persistent_locks_release(self, locks, check=''): try: retry = self._retry.copy() result = retry(self.__inner_persistent_locks_release, locks=locks, check=check) except RetryFailedError: raise LockError except KazooException as e: logger.error( 'Failed to remove persistent locks {0}: {1}\n{2}'.format( locks, e, traceback.format_exc())) raise LockError return result def __inner_persistent_locks_release(self, locks, check): for lockid in locks: try: if check: data = self.client.get(self.lock_path_prefix + lockid) if data[0] != check: logger.error( 'Lock {0} has inconsistent data: {}, expected {}'. format(lockid, data[0], check)) raise InconsistentLockError(lock_id=lockid, holder_id=data[0]) self.client.delete(self.lock_path_prefix + lockid) except NoNodeError: logger.warn( 'Persistent lock {0} is already removed'.format(lockid)) pass return True
class Scheduler: def __init__(self, zk_hosts, zk_root): self.zk = KazooClient(zk_hosts) self.root = zk_root self.tasks = set() self.event = threading.Event() def get_targets(self, task_id): result = {} node = '/{}/tasks/{}/targets'.format(self.root, task_id) for target in self.zk.get_children(node): path = '{}/{}'.format(node, target) status, _ = self.zk.get(path) result[target] = status.decode() return result def callback(self, task_id): node = '/{}/callback/{}'.format(self.root, task_id) self.zk.ensure_path(node) def copy_task(self, targets, task): for target in targets: node = '/{}/agents/{}/tasks/{}'.format(self.root, target, task['id']) tx = self.zk.transaction() tx.create(node, json.dumps(task).encode()) tx.set_data( '/{}/tasks/{}/targets/{}'.format(self.root, task['id'], target), b'W') tx.commit() def schedule(self, task_id): node = '/{}/tasks/{}'.format(self.root, task_id) lock_node = '{}/lock'.format(node) self.zk.ensure_path(lock_node) lock = Lock(self.zk, lock_node) try: if lock.acquire(timeout=1): data, _ = self.zk.get(node) task = json.loads(data.decode()) p = task.get('parallel', 1) rate = task.get('fail_rate', 0) targets = self.get_targets(task_id) if count(targets, ('F', )) / len(targets) > rate: return self.callback(task_id) if count(targets, ('F', 'S', 'K')) == len(targets): return self.callback(task_id) wait_schedule = choose(targets, p - count(targets, ('W', 'R'))) self.copy_task(wait_schedule, task) except LockTimeout: pass finally: lock.release() def watch_new_task(self, tasks): for task_id in set(tasks).difference(self.tasks): self.schedule(task_id) DataWatch(self.zk, '/{}/signal/{}'.format(self.root, task_id), partial(self.watch_exist_task, task_id=task_id)) self.tasks = tasks return not self.event.is_set() def watch_exist_task(self, task_id, *args): if self.zk.exists('/{}/callback/{}'.format(self.root, task_id)): return False self.schedule(task_id) return True def watch(self): ChildrenWatch(self.zk, '/{}/signal'.format(self.root), self.watch_new_task) def start(self): self.zk.start() self.watch() self.event.wait() def shutdown(self): self.event.set() self.zk.close()
class ResManager(object): ''' 资源管理模块 ''' def __init__(self): self._zk_client = KazooClient(hosts=CONF.zk_address) self._zk_client.start() CONF.log.debug("zk client started, zk_address = %s"%(CONF.zk_address)) self._zk_client.ensure_path(os.path.join(CONF.host_path),"list") self.zkHostLock = ZooKeeperLock(CONF.zk_address,"host_lock",os.path.join(CONF.host_path,"lock")) self.hostRootPath = os.path.join(CONF.host_path,"list") def allocate_agent(self, taskObj, count, agentRes, agentMaxQps): ''' 为指定的任务分配压测Agent ''' CONF.log.info(\ "[Enter]allocate_agent. taskObj=%s, count=%s, agentRes=%s, agentMaxQps=%s"\ %(taskObj.__dict__, count, agentRes, agentMaxQps)) # get task id taskId = taskObj.task_id queryType = taskObj.query_type # 根据task的query_type获得相应agent类型所占用的资源和MaxQps # 这里有一个约束:一个任务中只包含一种类型的压测Agent(资源占用相同) # 更新 Host 相关zk 节点 try: # 获取host列表(过滤掉error状态的host) hostList = self.read_hosts(nonerr=True) # 取host锁 self._acquire_host_lock() # 取transaction transaction = self._zk_client.transaction() # 本次的agent 分配列表 agentAllocateList = [] # 在初始获取lastSeq,所有分配结束后才更新lastSeq到zk lastSeq = self._getLastSeq() # 广度优先分配资源 while count > 0: # 获取最多resource的host列表 mostResourcefulHostList = self._select_most_resourceful_hosts(hostList) # 如果最多resource的host依然无法满足agentRes则分配失败 if int(mostResourcefulHostList[0].availableRes) < agentRes: CONF.log.debug("最多resource的host依然无法满足agentRes则分配失败(availableRes=%s,agnetRes=%s)"\ %(int(mostResourcefulHostList[0].availableRes),agentRes)) break for hostObj in mostResourcefulHostList: hostIp = hostObj.ip availableRes = int(hostObj.availableRes) # 当前主机资源满足需求,则分配一个agent在上面 if availableRes >= agentRes: lastSeq += 1 agentId = "%s_%s"%(hostIp,lastSeq) CONF.log.info("allocate one agent on %s. (availableRes=%s, agentRes=%s)"%(hostIp,availableRes,agentRes)) agentAllocateList.append({"hostIp":hostIp, "agentId":agentId, "agentRes":agentRes, "taskId":taskId, "queryType":queryType}) availableRes -= agentRes hostObj.availableRes = str(availableRes) hostObj.status = "occupied" count -= 1 # 更新zk (availableRes) availableResPath = os.path.join(self.hostRootPath,hostIp,"availableRes") statusPath = os.path.join(self.hostRootPath,hostIp,"status") self._zk_client.ensure_path(availableResPath) self._zk_client.ensure_path(statusPath) transaction.set_data(availableResPath,hostObj.availableRes) transaction.set_data(statusPath,hostObj.status) # 分配完成 if count == 0: break # 资源不足 if count > 0: raise Exception("资源不足(taskObj = %s, count = %d)"%(taskObj.__dict__, count)) # 分配成功,更新zk(lastSeq) lastSeqPath = os.path.join(CONF.host_path,"lastSeq") self._zk_client.ensure_path(lastSeqPath) transaction.set_data(lastSeqPath,str(lastSeq)) finally: # 释放host锁 self._release_host_lock() # 调用任务管理模块更新任务相关的resource信息 for agentObj in agentAllocateList: taskObj.add_agent(agentObj.get("hostIp"),agentObj.get("agentId"),agentMaxQps,agentRes) # 执行transaction, 更新Host zk 节点 transaction.commit() # 初始化AgentCtl,启动Agent for agentObj in agentAllocateList: agentCtl = AgentCtl(agentObj.get("hostIp"),agentObj.get("agentId"),agentObj.get("taskId"), agentObj.get("queryType")) agentCtl.create() CONF.log.debug("[Exit]allocate_agent. taskObj=%s, count=%s, agentRes=%s, agentMaxQps=%s"\ %(taskObj.__dict__, count, agentRes, agentMaxQps)) def _select_most_resourceful_hosts(self,hostlist): ''' utility function. 从给定hostlist中选出拥有最多availableRes的hosts Input: hostlist: hostObj的列表 Output: 给定hostlist中拥有最多availableRes的hostObj (列表,因为有可能有多个availableRes相同的host ''' CONF.log.debug("[Enter] _select_most_resourceful_hosts(), hostlist[0] = %s(type=%s,len=%d)"%(hostlist[0].__dict__,type(hostlist),len(hostlist))) # 1. Sort by availableRes sortedHostList = sorted(hostlist,key=lambda host:int(host.availableRes), reverse=True) # 2. 遍历取出availableRes最多的返回 maxAvailableRes = int(sortedHostList[0].availableRes) for idx in range(len(sortedHostList)): hostObj = sortedHostList[idx] if int(hostObj.availableRes) < maxAvailableRes: return sortedHostList[:idx] return sortedHostList def _getLastSeq(self): path = "/acp/host/lastSeq" result = self._zk_client.get(path)[0] return int(result) def getHostStatus(self,hostIp): status_path = os.path.join(self.hostRootPath,hostIp,"status") return self._zk_client.get(status_path)[0] def release_agents_for_task(self, taskObj, count): ''' 释放指定task,指定数量的agent taskObj: 描述task的对象 count: 待释放agent的数量 ''' CONF.log.debug("[Enter]release_agent_for_task: %s"%(taskObj.__dict__)) agentsForTask = taskObj.get_all_agent() if count > len(agentsForTask): raise Exception("count > number of agents for task: %s"%(str(taskObj))) # 选取count个待释放agent agentsToBeReleased = agentsForTask[:count] for agent in agentsToBeReleased: self.release_agent(agent) CONF.log.debug("[Exit]release_agent_for_task: %s"%(taskObj.__dict__)) def release_all_agents(self, taskObj): agentsForTask = taskObj.get_all_agent() for agent in agentsForTask: self.release_agent(agent) def release_agent(self, agentObj): ''' 释放指定的Agent ''' CONF.log.debug("[Enter]release_agent: %s"%(agentObj.__dict__)) agentCtl = AgentCtl(agentObj.host,agentObj.agent_id,agentObj.task_id,agentObj.query_type) # 通知Agent Daemon停agent agentCtl.delete() # 取锁, 然后更新Host节点(availableRes) self._acquire_host_lock() try: # 更新availableRes availableResPath = os.path.join(self.hostRootPath,agentCtl.hostIp,"availableRes") currentValue = int(self._zk_client.get(availableResPath)[0]) agentRes = int(agentObj.resource_num) currentValue += agentRes transaction = self._zk_client.transaction() # 取得totalRes totalResPath = os.path.join(self.hostRootPath,agentCtl.hostIp,"totalRes") totalRes = int(self._zk_client.get(totalResPath)[0]) # 更新availableRes on zk transaction.set_data(availableResPath,str(currentValue)) # 如果host上所有资源都被释放则置host状态为idle if currentValue == totalRes: statusPath = os.path.join(self.hostRootPath,agentCtl.hostIp,"status") CONF.log.debug("release_agent() set host status to idle") transaction.set_data(statusPath,"idle") # 通知task manager释放Agent资源 Agent.delete_agent(agentObj) transaction.commit() CONF.log.debug("[Exit]release_agent(), reset availableRes data (path=%s, value=%s)"%(availableResPath,str(currentValue))) finally: # 释放锁 self._release_host_lock() def read_hosts(self,filter=None,nonerr=False): try: CONF.log.debug("[Enter]read_hosts()") # 取锁 self._acquire_host_lock() # 取所有host根节点 hostIpList = self._zk_client.get_children(self.hostRootPath) ret = [] for hostIp in hostIpList: # 构造一个hostObj hostObj = Host(hostIp) hostpath = os.path.join(self.hostRootPath,hostIp) hostAttrList = self._zk_client.get_children(hostpath) for attr in hostAttrList: attrpath = os.path.join(self.hostRootPath,hostIp,attr) value = self._zk_client.get(attrpath)[0] setattr(hostObj,attr,value) CONF.log.debug("setattr: %s=%s"%(attr,value)) # 过滤掉error状态的host if nonerr and hostObj.status == "error": continue # 检查当前hostObj是否满足filter条件, 满足则添加到结果集 if filter == None or set(filter.items()).issubset(set(hostObj.__dict__.items())): ret.append(hostObj) CONF.log.debug("[Exit]read_hosts(), ret = %s, len=%d, ret[0] = %s"%(ret,len(ret),ret[0].__dict__)) return ret finally: # 释放锁 self._release_host_lock() def add_hosts(self,hostList): ''' hostObj: { "ip":"1.1.1.1", "totalRes": 4, "availableRes": 4, "status": idle } ''' CONF.log.info("[Enter]add_hosts()") try: # 取锁 self._acquire_host_lock() for hostObj in hostList: # 创建host根节点 hostPath = os.path.join(self.hostRootPath,hostObj.ip) if self._zk_client.exists(hostPath): raise Exception("The zk path \"%s\" already exists"%(hostPath)) transaction = self._zk_client.transaction() transaction.create(hostPath) # 创建host属性节点 for k,v in hostObj.__dict__.items(): keyPath = os.path.join(hostPath,k) transaction.create(keyPath) transaction.set_data(keyPath,str(v)) transaction.commit() CONF.log.info("[Exit]add_hosts()") finally: # 释放锁 self._release_host_lock() def del_hosts(self,hostList): CONF.log.info("[Enter]del_hosts()") try: # 取锁 self._acquire_host_lock() for hostObj in hostList: # 删除host根节点 hostPath = os.path.join(self.hostRootPath,hostObj.ip) if not self._zk_client.exists(hostPath): raise Exception("the zk path \"%s\" does not exist."%(hostPath)) self._zk_client.delete(hostPath,recursive=True) CONF.log.info("[Exit]del_hosts()") finally: # 释放锁 self._release_host_lock() def _acquire_host_lock(self): ret = self.zkHostLock.acquire() if not ret: raise Exception("acquire host lock failed.") def _release_host_lock(self): self.zkHostLock.release() def stopZkClient(self): CONF.log.debug("[Enter]stopZkClient()") if self._zk_client != None: self._zk_client.stop() self._zk_client = None def __del__(self): #self.stopZkClient() pass
class ArcusZooKeeper: """ ZooKeeper helper for Arcus """ def __init__(self, hostports, timeout): self.hostports = hostports self.timeout = timeout self.zk = KazooClient(hosts=hostports, read_only=False) def start(self): self.zk.start() def stop(self): self.zk.stop() def init_structure(self): if self.zk.exists('/arcus'): print 'init_arcus_structure: fail (/arcus exists)' return False tx = self.zk.transaction() tx.create('/arcus', b'') tx.create('/arcus/cache_list', b'') tx.create('/arcus/client_list', b'') tx.create('/arcus/cache_server_mapping', b'') results = tx.commit() if len(results) > 0: print results return False print 'init_structure: success' return True def drop_structure(self): self.zk.delete('/arcus', recursive=True) print 'delete_structure: success' def get_structure(self): return self.zk.get_children('/arcus') def get_mapping_for_service(self, service_code): result = [] cache_list = '/arcus/cache_list/%s' % service_code mapping = '/arcus/cache_server_mapping' all = self.zk.get_children(mapping) for ipport in all: codes = self.zk.get_children('%s/%s' % (mapping, ipport)) if len(codes) > 0: if codes[0] == service_code: result.append('%s/%s' % (mapping, ipport)) return result def get_config_for_service(self, service_code): cache_list = '/arcus/cache_list/%s' % service_code data, stat = self.zk.get(cache_list) return json.loads(data), data, stat def update_service_code(self, cluster): cache_list = '/arcus/cache_list/%s' % cluster['serviceCode'] client_list = '/arcus/client_list/%s' % cluster['serviceCode'] mapping = '/arcus/cache_server_mapping' try: delete_list = self.get_mapping_for_service(cluster['serviceCode']) # 0. Create a transaction tx = self.zk.transaction() # 1. Cache list if self.zk.exists(cache_list): c1, c2, c3 = self.get_config_for_service( cluster['serviceCode']) cluster['created'] = c1.get('created') cluster['modified'] = str(datetime.datetime.now()) tx.set_data(cache_list, json.dumps(cluster)) else: cluster['created'] = str(datetime.datetime.now()) tx.create('/arcus/cache_list/%s' % cluster['serviceCode'], json.dumps(cluster)) # 2. Client list if self.zk.exists(client_list): pass else: tx.create('/arcus/client_list/%s' % cluster['serviceCode'], b'') # 3. Mapping for each in delete_list: tx.delete('%s/%s' % (each, cluster['serviceCode'])) tx.delete(each) for server in cluster['servers']: global_config = cluster.get('config', {}) per_node_config = server.get('config', {}) config = dict(global_config.items() + per_node_config.items()) if len(config) == 0: print 'update_service_code: config not found for {0}'.format( server) continue map_ip = '/arcus/cache_server_mapping/%s:%s' % (server['ip'], config['port']) map_code = '%s/%s' % (map_ip, cluster['serviceCode']) tx.create(map_ip, json.dumps(config)) tx.create(map_code, b'') # 4. Commit results = tx.commit() print results except Exception as e: traceback.print_exc() def delete_service_code(self, cluster): cache_list = '/arcus/cache_list/%s' % cluster['serviceCode'] client_list = '/arcus/client_list/%s' % cluster['serviceCode'] mapping = '/arcus/cache_server_mapping' try: delete_list = self.get_mapping_for_service(cluster['serviceCode']) # 0. Create a transaction tx = self.zk.transaction() # 1. Cache list tx.delete('/arcus/cache_list/%s' % cluster['serviceCode']) # 2. Client list tx.delete('/arcus/client_list/%s' % cluster['serviceCode']) # 3. Mapping for each in delete_list: tx.delete('%s/%s' % (each, cluster['serviceCode'])) tx.delete(each) # 4. Commit results = tx.commit() print results except Exception as e: traceback.print_exc() def list_all_service_code(self): result = [] cache_list = '/arcus/cache_list' try: list = self.zk.get_children(cache_list) for each in list: result.append(self.list_service_code(each)) return result except Exception as e: traceback.print_exc() def list_service_code(self, service_code): result = {} cache_list = '/arcus/cache_list/%s' % service_code client_list = '/arcus/client_list/%s' % service_code mapping = '/arcus/cache_server_mapping' try: data, stat = self.zk.get(cache_list) static_list = self.get_mapping_for_service(service_code) current_list = self.zk.get_children(cache_list) # sort the lists static_list.sort() current_list.sort() # get clusterConfig cluster = json.loads(data) # get clusterStatus static_set = set([each.split('/')[-1] for each in static_list]) current_set = set([each.split('-')[0] for each in current_list]) offline = static_set - current_set online = static_set - offline undefined = current_set - static_set result['serviceCode'] = service_code result['config'] = cluster result['online'] = list(online) result['offline'] = list(offline) result['undefined'] = list(undefined) result['created'] = cluster.get('created') result['modified'] = cluster.get('modified') return result except Exception as e: traceback.print_exc()
class TrainCBox(object): def __init__(self, type, name): self._Type = type self._Name = "%s%s" % (type, str(name)) self._VName = HOST['name'] self._Path = "/%s/%s/%s" % (self._Type, self._VName, self._Name) self._ZKHost = ZKSERVERS['hosts'] self._IP = HOST['ip'] self._Port = HOST['port'] self._ID = int(name) self._ZK = None self._Chatbot = None self._Current_mina_master = '' self._Master_path = '/MinAMaster/%s' % self._VName self._MonitorRunning = False self._MinARunning = False self._Conn = None logger.debug('create a %s box named %s in VM %s.' % (self._Type, self._Name, self._VName)) def connectZK(self): self._ZK = KazooClient(hosts=self._ZKHost) logger.info('%s is connecting ZK server.' % self._Path) def getType(self): return self._Type def getName(self): return self._Name def getVName(self): return self._VName def setZK(self, zk): self._ZK = zk def getZK(self): return self._ZK def startZK(self): self._ZK.start() logger.debug( 'start one connection with ZK server by a %s box named %s in VM %s' % (self._Type, self._Name, self._VName)) def stopZK(self): self._ZK.stop() logger.debug( 'stop connection with ZK server by a %s box named %s in VM %s' % (self._Type, self._Name, self._VName)) def addZKListener(self): def my_listenser(state): if state == KazooState.LOST: self.startZK() elif state == KazooState.SUSPENDED: self.connectZK() self.startZK() else: self.startMonitor() self._ZK.add_listener(my_listenser) def startMonitor(self): #self._MonitorRunning = True # scribe Node Data Changes @self._ZK.DataWatch(self._Path) def watch_node_data_change(data, stat, path): #parse data #update the corresponding training status of train-task-sheet in MySQL database #you can invoke other class to complete this task above #then you also continue to instance a training chatterbot object for doing the key train task really if data: temp_str = eval(data.decode("utf-8"))['Target'] if self._Conn is None: self._Conn = mymysql.myconnect(KBDATABASES) param = (temp_str.split("_")[0], temp_str.split("_")[-1], 0) temp_status = mymysql.myselectstatus(self._Conn, param) # logger.error(temp_status) if operator.ne(temp_str, 'Null') and temp_status[0] == 3: logger.info('Watch one node %s with data %s is not Null' % (self._Path, temp_str)) param = (1, temp_str.split("_")[0], temp_str.split("_")[-1], 3) paramkg = (1, temp_str.split("_")[0], temp_str.split("_")[-1], 0) logger.debug('untrained kb %s is ready to train in mysql' % temp_str.split("_")[0]) if mymysql.myupdate(self._Conn, param): logger.info( 'untrained kb %s in mysql turns from status 3 to 1.' % temp_str.split("_")[0]) mymysql.myupdatekg(self._Conn, paramkg) if self.trainkb(temp_str): param = (2, temp_str.split("_")[0], temp_str.split("_")[-1], 1) if mymysql.myupdate(self._Conn, param): logger.info( 'success: trained kb %s in mysql turns from status 1 to 2.' % temp_str.split("_")[0]) mymysql.myupdatekg(self._Conn, param) else: param = (0, temp_str.split("_")[0], temp_str.split("_")[-1], 1) if mymysql.myupdate(self._Conn, param): logger.info( 'failure: untrained kb %s in mysql turns from status 1 to 0.' % temp_str.split("_")[0]) mymysql.myupdatekg(self._Conn, param) self.deleteZKnodedata() mymysql.myclose(self._Conn) self._Conn = None def addtraintaskMonitor(self): # monitor Node Data Changes if self._ZK.exists(self._Path): data, _ = self._ZK.get(self._Path) logger.debug('Monitor one A Box node with data: %s, path: %s' % (data.decode("utf-8"), self._Path)) if data: temp_str = eval(data.decode("utf-8"))['Target'] if self._Conn is None: self._Conn = mymysql.myconnect(KBDATABASES) param = (temp_str.split("_")[0], temp_str.split("_")[-1], 0) temp_status = mymysql.myselectstatus(self._Conn, param) if operator.ne(temp_str, 'Null') and temp_status[0] == 3: logger.debug('Watch one node %s with data %s is not Null' % (self._Path, temp_str)) param = (1, temp_str.split("_")[0], temp_str.split("_")[-1], 3) paramkg = (1, temp_str.split("_")[0], temp_str.split("_")[-1], 0) logger.info('untrained kb %s is ready to train in mysql' % temp_str.split("_")[0]) if mymysql.myupdate(self._Conn, param): logger.info( 'untrained kb %s in mysql turns from status 3 to 1.' % temp_str.split("_")[0]) mymysql.myupdatekg(self._Conn, paramkg) if self.trainkb(temp_str): param = (2, temp_str.split("_")[0], temp_str.split("_")[-1], 1) if mymysql.myupdate(self._Conn, param): logger.info('success: from status 1 to 2.') mymysql.myupdatekg(self._Conn, param) else: param = (0, temp_str.split("_")[0], temp_str.split("_")[-1], 1) if mymysql.myupdate(self._Conn, param): logger.info('failure: from status 1 to 0.') mymysql.myupdatekg(self._Conn, param) self.deleteZKnodedata() mymysql.myclose(self._Conn) self._Conn = None else: self.InitialABOXNode() def addMinAMasterMonitor(self): #self._MonitorRunning = True # scribe Master Node Data Removed if self._ZK.exists(self._Master_path) is None: self._ZK.create(self._Master_path, None, None, ephemeral=False, sequence=False, makepath=True) else: self._ZK.set(self._Master_path, None) @self._ZK.DataWatch(self._Master_path) def watch_node_data_removed(data, stat, path): if data is None: # show this is remove event self._Current_mina_master = '' self.searchIdleAwithId() def searchIdleAwithId(self): # List all children of A type BOX under the condition of zk connection children = self._ZK.get_children("/%s/%s" % (self._Type, self._VName)) if children is not None: temp_mina = sys.maxsize logger.debug("search idle A box %d with names %s" % (len(children), children)) random.shuffle(children) for child in children: child_path = "/%s/%s/%s" % (self._Type, self._VName, child) data, stat = self._ZK.get(child_path) if eval(data.decode("utf-8"))['Target'] == 'Null': temp = child.split("A")[-1] if temp_mina > int(temp): temp_mina = int(temp) logger.debug("mina=%d, id = %d" % (temp_mina, self._ID)) if temp_mina == self._ID: if self.takeMinAMaster(): logger.info("MinA = %d, Node = %s" % (temp_mina, self._Name)) self.processTrainTaskAssign() def searchIdleAboxassign(self): box_list = self._ZK.get_children("/%s/%s" % (self._Type, self._VName)) idle_box = [] random.shuffle(box_list) for box in box_list: node = "/%s/%s/%s" % (self._Type, self._VName, box) data, _ = self._ZK.get(node) if eval(data.decode("utf-8"))['Target'] == 'Null': idle_box.append(box) logger.debug('idle A box is %s.' % box) logger.info(idle_box) if idle_box: self.processtraintask(idle_box) def processtraintask(self, box): while True: datas = self.startMonitorMySQL() if len(datas): for aboxdata in datas: oneabox = box[0] param = (3, aboxdata.split("_")[0], aboxdata.split("_")[-1], 0) logger.debug( 'untrained kb %s is ready for assigned in a A idle box called %s' % (aboxdata.split("_")[0], oneabox)) if mymysql.myupdate(self._Conn, param): label = self.assignOneTrainTasktoABox( oneabox, aboxdata) if not label: param = (0, aboxdata.split("_")[0], aboxdata.split("_")[-1], 3) mymysql.myupdate(self._Conn, param) logger.info( 'failure for update training mission %s. so from status 3 to 0.' % aboxdata.split("_")[0]) else: box.pop(0) logger.info( 'success for update training mission %s from status 0 to 3.' % aboxdata.split("_")[0]) if not box: mymysql.myclose(self._Conn) self._Conn = None logger.debug('delete ZK node %s' % self._Master_path) return True time.sleep(10) def takeMinAMaster(self): if self._Type == 'A': address = "{\"Name\":\"%s\"}" % self._Name address = address.encode('utf-8') try: self._ZK.create(self._Master_path, address, None, ephemeral=False, sequence=False, makepath=True) except self._ZK.NodeExistsError: data, stat = self._ZK.get(self._Master_path) if data: self._Current_mina_master = eval( data.decode("utf-8"))['Name'] logger.debug('current mina is %s' % self._Current_mina_master) return False #break finally: self._Current_mina_master = self._Name logger.debug('current mina is itself i.e %s.' % self._Name) return True def processTrainTaskAssign(self): while True: datas = self.startMonitorMySQL() if len(datas): for aboxdata in datas: oneabox = self.findOneIdleABox() if oneabox == '': param = (3, aboxdata.split("_")[0], aboxdata.split("_")[-1], 0) logger.info( 'untrained kb %s is ready for assigned in min A box called %s' % (aboxdata.split("_")[0], self._Name)) if mymysql.myupdate(self._Conn, param): logger.info( 'success for update training mission %s from status 0 to 3.' % aboxdata.split("_")[0]) if self.assignOneTrainTasktoABox( self._Name, aboxdata): mymysql.myclose(self._Conn) self._Conn = None self._ZK.delete(self._Master_path, recursive=True) logger.debug('delete ZK node %s' % self._Master_path) return True else: param = (0, aboxdata.split("_")[0], aboxdata.split("_")[-1], 3) mymysql.myupdate(self._Conn, param) logger.info( 'failure for update training mission %s. so from status 3 to 0.' % aboxdata.split("_")[0]) else: param = (3, aboxdata.split("_")[0], aboxdata.split("_")[-1], 0) logger.info( 'untrained kb %s is ready for assigned in a A idle box called %s' % (aboxdata.split("_")[0], oneabox)) if mymysql.myupdate(self._Conn, param): logger.info( 'success for update training mission %s from status 0 to 3.' % aboxdata.split("_")[0]) label = self.assignOneTrainTasktoABox( oneabox, aboxdata) if not label: param = (0, aboxdata.split("_")[0], aboxdata.split("_")[-1], 3) mymysql.myupdate(self._Conn, param) logger.info( 'failure for update training mission %s. so from status 3 to 0.' % aboxdata.split("_")[0]) time.sleep(10) def findOneIdleABox(self): oneidleabox = '' children = self._ZK.get_children("/%s/%s" % (self._Type, self._VName)) logger.debug( "find one idle A box There are %s children with names %s" % (len(children), children)) random.shuffle(children) for child in children: child_path = "/%s/%s/%s" % (self._Type, self._VName, child) data, stat = self._ZK.get(child_path) if eval(data.decode("utf-8"))['Target'] == 'Null': if operator.ne(child, self._Name): oneidleabox = child logger.debug('one idle A box is %s.' % oneidleabox) break return oneidleabox def assignOneTrainTasktoABox(self, oneabox, aboxdata): oneabox_path = "/%s/%s/%s" % (self._Type, self._VName, oneabox) oneabox_address = "{\"Target\":\"%s\"}" % aboxdata oneabox_address = oneabox_address.encode('utf-8') data, stat = self._ZK.get(oneabox_path) if eval(data.decode("utf-8"))['Target'] == 'Null': self._ZK.set(oneabox_path, oneabox_address) logger.info('assign kb %s to A box %s' % (aboxdata, oneabox)) return True else: logger.error('Error:A box %s is busy and can not be assigned.' % oneabox) return False def InitialMinANode(self): if self._ZK.exists(self._Master_path): self._ZK.delete(self._Master_path, recursive=True) logger.info('ZK node %s is deleted.' % self._Master_path) def InitialABOXNode(self): address = "{\"Target\":\"Null\"}" address = address.encode('utf-8') if self._ZK.exists(self._Path): self._ZK.delete(self._Path, recursive=True) self._ZK.create(self._Path, address, None, ephemeral=False, sequence=False, makepath=True) logger.info('create a A box node: %s, data: %s' % (self._Path, address.decode("utf-8"))) def InitialBBOXNode(self): address = "{\"Target\":\"Null\",\"Add\":\"%s:%s/%s\",\"status\":\"0\",\"update_time\":\"%f\"}" % ( self._IP, self._Port, self._ID, time.time()) address = address.encode('utf-8') if self._ZK.exists(self._Path): self._ZK.delete(self._Path, recursive=True) self._ZK.create(self._Path, address, None, ephemeral=False, sequence=False, makepath=True) logger.info('create a B box node: %s, data: %s' % (self._Path, address.decode("utf-8"))) vmknode = "/%s/%s" % (HOST['name'], CBOX['Bk']) if self._ZK.exists(vmknode): self._ZK.delete(vmknode, recursive=True) self._ZK.create(vmknode, None, None, ephemeral=False, sequence=False, makepath=True) logger.info('create a VM/k node: %s, not data. ' % vmknode) def startMonitorMySQL(self): self._Conn = mymysql.myconnect(KBDATABASES) train_tasks = [] param = (0, 0) selectresult = mymysql.myselect(self._Conn, param) for row in selectresult: if str(row[1]).isdigit(): train_tasks.append("%s_%s" % (row[0], row[1])) logger.info('current train tasks is %s from mysql.' % train_tasks) return train_tasks def updateselfZKBBox(self, status): oneabox_path = self._Path data, _ = self._ZK.get(oneabox_path) oneabox_address = "{\"Target\":\"%s\",\"Add\":\"%s\",\"status\":\"%s\",\"update_time\":\"%f\"}" % ( eval(data.decode("utf-8"))['Target'], eval( data.decode("utf-8"))['Add'], str(status), time.time()) oneabox_address = oneabox_address.encode('utf-8') self._ZK.set(oneabox_path, oneabox_address) logger.info('success update B Box node %s with data %s.' % (oneabox_path, oneabox_address)) def updateselfZKBBoxTarget(self, target, status): # updating 'oneabox' B tpye node data in ZK Server oneabox_path = self._Path data, _ = self._ZK.get(oneabox_path) oneabox_address = "{\"Target\":\"%s\",\"Add\":\"%s\",\"status\":\"%s\",\"update_time\":\"%f\"}" % ( target, eval( data.decode("utf-8"))['Add'], str(status), time.time()) oneabox_address = oneabox_address.encode('utf-8') self._ZK.set(oneabox_path, oneabox_address) logger.info('success update B') def stop(self): self._MonitorRunning = False self._MinARunning = False self.stopZK() def initcbot(self, kbname, onlyread=False): try: self._Chatbot = ChatBot( self._Name, storage_adapter=CHATTERBOT['storage_adapter'], filters=['chatterbot.filters.RepetitiveResponseFilter'], database_uri=KGDATABASES['database_uri'], database='ai_%s' % kbname, read_only=onlyread, ) # logger.info(self._Chatbot) except Exception as msg: logger.info('Failure to initialize Chatterbot.', exc_info=True) logger.error(msg) def preprocess(self, sentence, companyid=None): if ISFENCI: if ISSYMS: return fenci.symp_sentence(sentence, companyid) else: return sentence else: return sentence def trainkb(self, kbname): self.initcbot(kbname) try: logger.info("start set trainer") self._Chatbot.set_trainer(ListTrainer) except Exception as msg: logger.error(msg) logger.info("start set trainer") a = 0 param = (kbname.split("_")[0]) selectresult = mymysql.myselectqas(self._Conn, param) company_id = mymysql.myselectcpid(self._Conn, param) logger.debug('start training the knowdata: %s and the companyid: %s.' % (kbname, company_id[0])) b = len(selectresult) for row in selectresult: answer = "%s@%s" % (row[2], row[0]) question = self.preprocess(row[1], company_id[0]) self._Chatbot.train([question, answer]) logger.debug('Train: %d, %s --> %s.' % (a, question, answer)) a = a + 1 if a >= b: logger.info('success training.') return True else: logger.info('failure training.') return False def deleteZKnodedata(self): #oneabox_path = "/%s/%s/%s" % (self._Type, self._VName, self._Name) oneabox_path = self._Path oneabox_address = "{\"Target\":\"Null\"}" oneabox_address = oneabox_address.encode('utf-8') self._ZK.set(oneabox_path, oneabox_address) logger.info('A box %s turns busy into idle.' % self._Path) def startBZKmonitor(self): children = self._ZK.get_children("/%s/%s" % (self._Type, self._VName)) #list(map(lambda child: self._ZK.get(child_path), children)) for child in children: child_path = "/%s/%s/%s" % (self._Type, self._VName, child) data, stat = self._ZK.get(child_path) # if eval(data.decode("utf-8"))['status'] == str(0): kbid = eval(data.decode("utf-8"))['Target'] if operator.ne(kbid, 'Null'): if time.time() > float( eval( data.decode("utf-8"))['update_time']) + TIMERHOURS: onebbox_address = "{\"Target\":\"Null\",\"Add\":\"%s\",\"status\":\"0\",\"update_time\":\"%f\"}" % ( eval(data.decode("utf-8"))['Add'], time.time()) onebbox_address = onebbox_address.encode('utf-8') self._ZK.set(child_path, onebbox_address) logger.info( 'set B Box node %s is null and status 0 because of timeout a half hour.' % child_path) tmp_node = "/%s/%s/%s/%s" % (self._VName, CBOX['Bk'], kbid, child) if self._ZK.exists(tmp_node): transaction = self._ZK.transaction() transaction.delete(tmp_node) transaction.commit() logger.info('delete a VM/k/kb/Box node %s.' % tmp_node)
@zk.ChildrenWatch("/xy/test") def watch_children(children): #print("watch_children of /xy/test, Children are now: %s" % str(children)) #print("watch_children of /xy/test, Children count: %d" % len(children)) pass # Above function called immediately, and from then on @zk.DataWatch("/xy/test") def watch_node(data, stat): #print("watch_node, Version: %s, data: %s" % (stat.version, data.decode("utf-8"))) pass #trans, great!!! transaction = zk.transaction() transaction.check('/xy/test/node2', version=3) transaction.create('/xy/test/node2', b"a value") result = transaction.commit() print("transaction result %s" % str(result)) print("----------------------------") # for i in range(1,100): # try: # result = zk.create("/xy/test/node", b"a value", acl=None, sequence=True, ephemeral=True) # except Exception, e: # print('=========== exception when create node, %s' % e) # else: # #print('=========== create /xy/test/node reuslt=%s' % result ) # pass
class Zookeeper: def __init__(self, hosts, max_merge_seq): print('create a zookeeper object') self.zk = "" self.IsConn = False self.Hosts = hosts self.MAX_MERGE_FILE_SEQUENCE = max_merge_seq self.filename = '' self.pattern = '' self.process_path = '' def connect(self): """ connect to zookeeper :return:zookeeper object """ print('try connect to zookeeper') self.zk = KazooClient(self.Hosts) try: self.zk.start() except Exception as e: print("connect zookeeper failed, err:%s" % e) sys.exit() self.IsConn = True print('connect zookeeper success') return self.zk def get_node(self, node_path): """ 获取空闲的process_id :return: process_id """ self.connect() self.process_path = node_path node_list = [] if not (self.zk.exists(node_path)): logging.error('zookeeper process node path: %s not exist' % node_path) sys.exit() childs = self.zk.get_children(node_path) # len = 0 p1 = re.compile(r"^process") for c in childs: if re.findall(p1, c): node_list.append(c) node_list = sorted(node_list) if len(node_list) <= 0: print("no process id in zookeeper process path") sys.exit() get_times = 0 while 1: for node in node_list: lock_flag = False node_name = '%s/%s' % (node_path, node) n_child = self.zk.get_children(node_name) if len(n_child) > 0: for n in n_child: if n == 'lock': lock_flag = True if lock_flag: continue lock_node = "%s/%s" % (node_name, 'lock') self.zk.create(lock_node, ephemeral=True) # process_id = ''.join(node.split('_')[1:]) print('get process_id :%s from zookeeper ' % node) return node get_times += 1 print("no free process id in zookeeper") if get_times >= 3: print( "get process id faild three times, please check zookeeper process id, exit" ) sys.exit() def lock(self, lock): """ lock the free node :param lock: :return: """ self.zk.create(lock, ephemeral=True) def check_exists(self, node_path): return self.zk.exists(node_path) def get_config(self, config_path, config_node): """ generate config files based on node's information :param config_path: :param config_node: :return: """ data, stat = self.zk.get(config_node) with open(config_path + "config.ini", 'w') as f: f.writelines(data.decode()) def get_node_value(self, zk_node): """ 获取zookeeper的节点信息 :param zk_node: :return: data:node的value stat:node的状态信息 """ data, stat = self.zk.get(zk_node) return data, stat def set_node_value(self, zk_node, data): """ 设置zookeeper节点的value :param zk_node: :param data: :return: """ return self.zk.set(zk_node, value=data) def delete_node(self, zk_node): """ 删除某一节点 :param zk_node: :return: """ self.zk.delete(zk_node) def create_node(self, node, flag=False): """ 创建zookeeper节点 :param node: :param flag: :return: """ try: self.zk.create(node, ephemeral=flag) except Exception as e: logging.info("create zookeeper node:%s failed, err:%s" % (node, e)) print(node, e) return False return True def cp(self, src, dest): """ copy the local file to zookeeper :param src:local file :param dest:zookeeper node :return: """ if not os.path.isfile(src): print("%s: `%s': Local file does not exist" % ('cp', src)) sys.exit() file_size = os.path.getsize(src) if file_size > 1048576: print("%s: `%s': Local file maximum limit of 1M" % ('cp', src)) sys.exit() self.connect() if self.zk.exists(dest): print("%s: `%s': Zookeeper exists" % ('cp', dest)) sys.exit() with open(src, 'rb') as file: data = file.read() self.zk.create(dest) self.zk.set(dest, value=data) def zk_get_merge_fn(self, process_path, work_node, cur_seq, filename_pool): """ 获取filename_pool下的序号,记录redo :param process_path :param work_node :param cur_seq: :param filename_pool: :return: zk_seq: 0: 返回0代表未到合并时间点 1: 返回1代表没有抢占到filename_pool next_child:返回获取到的filename_pool节点 """ if not self.zk.exists(filename_pool): logging.error('no filename_pool in zookeeper') sys.exit() childs = self.zk.get_children(filename_pool) if not childs: logging.error('the zookeeper filename_pool is empty') sys.exit() # zk_fn_seq = childs[0] childs = sorted(childs) redo_info = [] for child in childs: file_date, zk_seq, prov = child.split('.') zk_fs = ("%s%s" % (file_date, zk_seq)) zk_fs = re.sub("[A-Za-z.]", "", zk_fs) if int(zk_fs) > int(cur_seq): logging.info('zk_seq:%s > cur_seq:%s, wait...' % (zk_fs, cur_seq)) return 0 zk_seq = int(zk_seq) + 1 if zk_seq > self.MAX_MERGE_FILE_SEQUENCE: zk_seq = 0 file_date = datetime.datetime.strptime(file_date, '%Y%m%d') next_time = file_date + datetime.timedelta(days=1) file_date = ('%s%02d%02d' % (next_time.year, next_time.month, next_time.day)) zk_seq = "%03d" % zk_seq next_child = '%s.%s.%s' % (file_date, zk_seq, prov) # 创建一次事务,删除旧的序号并创建新的序号,保证原子性 transaction_request = self.zk.transaction() transaction_request.delete("%s/%s" % (filename_pool, child)) transaction_request.create("%s/%s" % (filename_pool, next_child)) redo_seq = ",".join([file_date, zk_seq, prov]) redo_info.append("filenamepool:" + redo_seq) redo_node = process_path + "/" + work_node + "/" + "redo" self.create_node(redo_node) self.set_node_value(redo_node, ";".join(redo_info).encode("utf-8")) results = transaction_request.commit() if results[0] is True and results[1] == ( "%s/%s" % (filename_pool, next_child)): return next_child else: continue return 1
class ZKHandler(object): def __init__(self, config, logger=None): """ Initialize an instance of the ZKHandler class with config A zk_conn object will be created but not started A ZKSchema instance will be created """ self.encoding = "utf8" self.coordinators = config["coordinators"] self.logger = logger self.zk_conn = KazooClient(hosts=self.coordinators) self._schema = ZKSchema() # # Class meta-functions # def coordinators(self): return str(self.coordinators) def log(self, message, state=""): if self.logger is not None: self.logger.out(message, state) else: print(message) # # Properties # @property def schema(self): return self._schema # # State/connection management # def listener(self, state): """ Listen for KazooState changes and log accordingly. This function does not do anything except for log the state, and Kazoo handles the rest. """ if state == KazooState.CONNECTED: self.log("Connection to Zookeeper resumed", state="o") else: self.log( "Connection to Zookeeper lost with state {}".format(state), state="w") def connect(self, persistent=False): """ Start the zk_conn object and connect to the cluster """ try: self.zk_conn.start() if persistent: self.log("Connection to Zookeeper started", state="o") self.zk_conn.add_listener(self.listener) except Exception as e: raise ZKConnectionException(self, e) def disconnect(self, persistent=False): """ Stop and close the zk_conn object and disconnect from the cluster The class instance may be reused later (avoids persistent connections) """ self.zk_conn.stop() self.zk_conn.close() if persistent: self.log("Connection to Zookeeper terminated", state="o") # # Schema helper actions # def get_schema_path(self, key): """ Get the Zookeeper path for {key} from the current schema based on its format. If {key} is a tuple of length 2, it's treated as a path plus an item instance of that path (e.g. a node, a VM, etc.). If {key} is a tuple of length 4, it is treated as a path plus an item instance, as well as another item instance of the subpath. If {key} is just a string, it's treated as a lone path (mostly used for the 'base' schema group. Otherwise, returns None since this is not a valid key. This function also handles the special case where a string that looks like an existing path (i.e. starts with '/') is passed; in that case it will silently return the same path back. This was mostly a migration functionality and is deprecated. """ if isinstance(key, tuple): # This is a key tuple with both an ipath and an item if len(key) == 2: # 2-length normal tuple ipath, item = key elif len(key) == 4: # 4-length sub-level tuple ipath, item, sub_ipath, sub_item = key return self.schema.path(ipath, item=item) + self.schema.path( sub_ipath, item=sub_item) else: # This is an invalid key return None elif isinstance(key, str): # This is a key string with just an ipath ipath = key item = None # This is a raw key path, used by backup/restore functionality if re.match(r"^/", ipath): return ipath else: # This is an invalid key return None return self.schema.path(ipath, item=item) # # Key Actions # def exists(self, key): """ Check if a key exists """ path = self.get_schema_path(key) if path is None: # This path is invalid, this is likely due to missing schema entries, so return False return False stat = self.zk_conn.exists(path) if stat: return True else: return False def read(self, key): """ Read data from a key """ try: path = self.get_schema_path(key) if path is None: # This path is invalid; this is likely due to missing schema entries, so return None return None return self.zk_conn.get(path)[0].decode(self.encoding) except NoNodeError: return None def write(self, kvpairs): """ Create or update one or more keys' data """ if type(kvpairs) is not list: self.log("ZKHandler error: Key-value sequence is not a list", state="e") return False transaction = self.zk_conn.transaction() for kvpair in kvpairs: if type(kvpair) is not tuple: self.log( "ZKHandler error: Key-value pair '{}' is not a tuple". format(kvpair), state="e", ) return False key = kvpair[0] value = kvpair[1] path = self.get_schema_path(key) if path is None: # This path is invalid; this is likely due to missing schema entries, so continue continue if not self.exists(key): # Creating a new key transaction.create(path, str(value).encode(self.encoding)) else: # Updating an existing key data = self.zk_conn.get(path) version = data[1].version # Validate the expected version after the execution new_version = version + 1 # Update the data transaction.set_data(path, str(value).encode(self.encoding)) # Check the data try: transaction.check(path, new_version) except TypeError: self.log( "ZKHandler error: Key '{}' does not match expected version" .format(path), state="e", ) return False try: transaction.commit() return True except Exception as e: self.log( "ZKHandler error: Failed to commit transaction: {}".format(e), state="e") return False def delete(self, keys, recursive=True): """ Delete a key or list of keys (defaults to recursive) """ if type(keys) is not list: keys = [keys] for key in keys: if self.exists(key): try: path = self.get_schema_path(key) self.zk_conn.delete(path, recursive=recursive) except Exception as e: self.log( "ZKHandler error: Failed to delete key {}: {}".format( path, e), state="e", ) return False return True def children(self, key): """ Lists all children of a key """ try: path = self.get_schema_path(key) if path is None: # This path is invalid; this is likely due to missing schema entries, so return None return None return self.zk_conn.get_children(path) except NoNodeError: return None def rename(self, kkpairs): """ Rename one or more keys to a new value """ if type(kkpairs) is not list: self.log("ZKHandler error: Key-key sequence is not a list", state="e") return False transaction = self.zk_conn.transaction() def rename_element(transaction, source_path, destination_path): data = self.zk_conn.get(source_path)[0] transaction.create(destination_path, data) if self.children(source_path): for child_path in self.children(source_path): child_source_path = "{}/{}".format(source_path, child_path) child_destination_path = "{}/{}".format( destination_path, child_path) rename_element(transaction, child_source_path, child_destination_path) transaction.delete(source_path) for kkpair in kkpairs: if type(kkpair) is not tuple: self.log( "ZKHandler error: Key-key pair '{}' is not a tuple".format( kkpair), state="e", ) return False source_key = kkpair[0] source_path = self.get_schema_path(source_key) if source_path is None: # This path is invalid; this is likely due to missing schema entries, so continue continue destination_key = kkpair[1] destination_path = self.get_schema_path(destination_key) if destination_path is None: # This path is invalid; this is likely due to missing schema entries, so continue continue if not self.exists(source_key): self.log( "ZKHander error: Source key '{}' does not exist".format( source_path), state="e", ) return False if self.exists(destination_key): self.log( "ZKHander error: Destination key '{}' already exists". format(destination_path), state="e", ) return False rename_element(transaction, source_path, destination_path) try: transaction.commit() return True except Exception as e: self.log( "ZKHandler error: Failed to commit transaction: {}".format(e), state="e") return False # # Lock actions # def readlock(self, key): """ Acquires a read lock on a key """ count = 1 lock = None path = self.get_schema_path(key) while True: try: lock_id = str(uuid.uuid1()) lock = self.zk_conn.ReadLock(path, lock_id) break except NoNodeError: self.log( "ZKHandler warning: Failed to acquire read lock on nonexistent path {}" .format(path), state="e", ) return None except Exception as e: if count > 5: self.log( "ZKHandler warning: Failed to acquire read lock after 5 tries: {}" .format(e), state="e", ) break else: time.sleep(0.5) count += 1 continue return lock def writelock(self, key): """ Acquires a write lock on a key """ count = 1 lock = None path = self.get_schema_path(key) while True: try: lock_id = str(uuid.uuid1()) lock = self.zk_conn.WriteLock(path, lock_id) break except NoNodeError: self.log( "ZKHandler warning: Failed to acquire write lock on nonexistent path {}" .format(path), state="e", ) return None except Exception as e: if count > 5: self.log( "ZKHandler warning: Failed to acquire write lock after 5 tries: {}" .format(e), state="e", ) break else: time.sleep(0.5) count += 1 continue return lock def exclusivelock(self, key): """ Acquires an exclusive lock on a key """ count = 1 lock = None path = self.get_schema_path(key) while True: try: lock_id = str(uuid.uuid1()) lock = self.zk_conn.Lock(path, lock_id) break except NoNodeError: self.log( "ZKHandler warning: Failed to acquire exclusive lock on nonexistent path {}" .format(path), state="e", ) return None except Exception as e: if count > 5: self.log( "ZKHandler warning: Failed to acquire exclusive lock after 5 tries: {}" .format(e), state="e", ) break else: time.sleep(0.5) count += 1 continue return lock