Beispiel #1
0
class ZK:

    client = None

    def __init__(self, zk_host):
        self.client = KazooClient(zk_host)
        self.client.start()

    def __del__(self):
        self.client.stop()

    def get_node(self, path):
        if not self.client.exists(path):
            return None
        node = ZKNode(path, self)
        return node

    def create_node(self, path):
        self.client.ensure_path(path)
        return self.get_node(path)

    def get_transaction(self):
        return self.client.transaction()

    def get_lock(self, path, id=None):
        return self.client.Lock(path + "/lock", id)

    def has_lock(self, path):
        lock_path = path + "/lock"
        if not self.client.exists(lock_path):
            return False
        if len(self.client.get_children(lock_path)) > 0:
            return True
        else:
            return False
Beispiel #2
0
class Callback:
    def __init__(self, zk_hosts, zk_root):
        self.zk = KazooClient(zk_hosts)
        self.root = zk_root
        self.event = threading.Event()
        self.tasks = {}

    def get_task(self, task_id):
        node = '/{}/tasks/{}'.format(self.root, task_id)
        data, _ = self.zk.get(node)
        task = json.loads(data.decode())
        targets = {}
        for target in self.zk.get_children('{}/targets'):
            path = '{}/targets/{}'.format(node, target)
            status, _ = self.zk.get(path)
            targets[target] = status.decode()
        task['targets'] = targets
        return task

    def delete(self, task_id):
        callback_node = '/{}/callback/{}'.format(self.root, task_id)
        task_node = '/{}/tasks/{}'.format(self.root, task_id)
        tx = self.zk.transaction()
        tx.delete(callback_node)
        tx.delete(task_node)
        tx.commit()

    def run(self, task_id):
        task = self.get_task(task_id)
        try:
            requests.post(task['callback'], json=task)
            self.delete(task_id)
        except Exception as e:
            logging.error(e)

    def watch_tasks(self, tasks):
        for task_id in set(tasks).difference(self.tasks):
            self.run(task_id)
        self.tasks = tasks
        return not self.event.is_set()

    def watch(self):
        ChildrenWatch(self.zk, '/{}/callback'.format(self.root),
                      self.watch_tasks)

    def compensate(self):
        while not self.event.is_set():
            for task in self.zk.get_children('/{}/callback'.format(self.root)):
                self.run(task)
            self.event.wait(10)

    def start(self):
        self.zk.start()
        self.watch()
        self.compensate()

    def shutdown(self):
        self.event.set()
        self.zk.close()
Beispiel #3
0
def main():
    zk = KazooClient(hosts='127.0.0.1:2181')
    zk.start()

    @zk.add_listener
    def my_listener(state):
        if state == KazooState.LOST:
            print("LOST")
        elif state == KazooState.SUSPENDED:
            print("SUSPENDED")
        else:
            print("Connected")

    # Creating Nodes
    # Ensure a path, create if necessary
    zk.ensure_path("/my/favorite")
    # Create a node with data
    zk.create("/my/favorite/node", b"")
    zk.create("/my/favorite/node/a", b"A")
    # Reading Data
    # Determine if a node exists
    if zk.exists("/my/favorite"):
        print("/my/favorite is existed")

    @zk.ChildrenWatch("/my/favorite/node")
    def watch_children(children):
        print("Children are now: %s" % children)

    # Above function called immediately, and from then on
    @zk.DataWatch("/my/favorite/node")
    def watch_node(data, stat):
        print("Version: %s, data: %s" % (stat.version, data.decode("utf-8")))

    # Print the version of a node and its data
    data, stat = zk.get("/my/favorite/node")
    print("Version: %s, data: %s" % (stat.version, data.decode("utf-8")))
    # List the children
    children = zk.get_children("/my/favorite/node")
    print("There are %s children with names %s" % (len(children), children))
    # Updating Data
    zk.set("/my/favorite", b"some data")
    # Deleting Nodes
    zk.delete("/my/favorite/node/a")
    # Transactions
    transaction = zk.transaction()
    transaction.check('/my/favorite/node', version=-1)
    transaction.create('/my/favorite/node/b', b"B")
    results = transaction.commit()
    print("Transaction results is %s" % results)
    zk.delete("/my/favorite/node/b")
    zk.delete("/my", recursive=True)
    time.sleep(2)
    zk.stop()
Beispiel #4
0
class ZookeeperClient:
    def __init__(self, configuration):
        self.host = configuration['host']
        self.port = configuration['port']

        self.client = KazooClient(hosts='{}:{}'.format(self.host, self.port))
        self.client.start()

    def create_node(self, path, data):
        self.client.create(path, data)

    def get_node(self, path):
        data, stat = self.client.get(path)
        return data

    def get_children(self, path):
        return self.client.get_children(path)

    def get_children_count(self, path):
        try:
            return len(self.get_children(path))
        except NoNodeError:
            return -1

    @contextmanager
    def transaction(self):
        transaction = self.client.transaction()

        try:
            yield transaction
        finally:
            results = transaction.commit()

            if "failure" in results:
                logging.error("Zookeeper transaction failed!")

    def delete_node(self, path):
        self.client.delete(path)

    def safe_delete_node(self, path):
        try:
            self.client.delete(path)
            return True
        except BadVersionError as e:
            logging.warn("Bad Version Error")
            return False
        except NoNodeError as e:
            logging.warn("No Node Error")
            return False

    def close(self):
        self.client.stop()
Beispiel #5
0
def main():
    zk = KazooClient(hosts='10.0.0.130:2182')

    zk.add_listener(my_listener)

    zk.start()
    zk.ensure_path("/my/favorite")

    zk.create("/my/favorite/node", b"")
    zk.create("/my/favorite/node/a", b"A")

    if zk.exists("/my/favorite"):
        print("/my/favorite is existed")

    @zk.ChildrenWatch("/my/favorite/node")
    def watch_children(children):
        print("Children are now: %s" % children)

    @zk.DataWatch("/my/favorite/node")
    def watch_node(data, stat):
        print("Version: %s, data: %s" % (stat.version, data.decode("utf-8")))

    data, stat = zk.get("/my/favorite/node")
    print("Version: %s, data: %s" % (stat.version, data.decode("utf-8")))

    # List the children
    children = zk.get_children("/my/favorite/node")
    print("There are %s children with names %s" % (len(children), children))

    # Updating Data
    zk.set("/my/favorite", b"some data")

    # Deleting Nodes
    zk.delete("/my/favorite/node/a")

    # Transactions
    transaction = zk.transaction()
    transaction.check('/my/favorite/node', version=-1)
    transaction.create('/my/favorite/node/b', b"B")

    results = transaction.commit()
    print("Transaction results is %s" % results)

    zk.delete("/my/favorite/node/b")
    zk.delete("/my", recursive=True)
    time.sleep(2)

    zk.stop()
Beispiel #6
0
    def run(self):
        zk_path_topic_tmpl = '/consumers/my_consumer/offsets/'
        zk_path_partition_tmpl = zk_path_topic_tmpl + '{topic}/{partition}'

        zk_conn = KazooClient(self.zk_connect_str, timeout=10)
        zk_conn.start()

        for topic in self.topics:
            for partition in self.partitions:
                node_path = zk_path_partition_tmpl.format(topic=topic,
                                                          partition=partition)
                node = zk_conn.exists(node_path)
                if not node:
                    zk_conn.ensure_path(node_path)
                    zk_conn.set(node_path, b"0")

        consumer = KafkaConsumer(
            bootstrap_servers=[self.kafka_connect_str],
            group_id="my_consumer",
            auto_offset_reset='earliest',
            enable_auto_commit=False,
        )
        consumer.subscribe(self.topics)

        iteration = 0
        while not self._shutdown_event.is_set():
            response = consumer.poll(timeout_ms=500, max_records=10)
            zk_trans = zk_conn.transaction()
            for tp, records in iteritems(response):
                topic = tp.topic
                partition = tp.partition

                offset = None
                for record in records:
                    if offset is None or record.offset > offset:
                        offset = record.offset

                if offset:
                    zk_trans.set_data(
                        os.path.join(zk_path_topic_tmpl.format(topic),
                                     str(partition)), binary_type(offset))

            zk_trans.commit()
            iteration += 1

        zk_conn.stop()
Beispiel #7
0
    def run(self):
        zk_path_topic_tmpl = '/consumers/my_consumer/offsets/'
        zk_path_partition_tmpl = zk_path_topic_tmpl + '{topic}/{partition}'

        zk_conn = KazooClient(zk_instance['zk_connect_str'], timeout=10)
        zk_conn.start()

        for topic in TOPICS:
            for partition in PARTITIONS:
                node_path = zk_path_partition_tmpl.format(topic=topic,
                                                          partition=partition)
                node = zk_conn.exists(node_path)
                if not node:
                    zk_conn.ensure_path(node_path)
                    zk_conn.set(node_path, str(0))

        consumer = KafkaConsumer(
            bootstrap_servers=zk_instance['kafka_connect_str'],
            group_id="my_consumer",
            auto_offset_reset='earliest',
            enable_auto_commit=False)
        consumer.subscribe(TOPICS)

        while not SHUTDOWN.is_set():
            response = consumer.poll(timeout_ms=500, max_records=10)
            zk_trans = zk_conn.transaction()
            for tp, records in response.iteritems():
                topic = tp.topic
                partition = tp.partition

                offset = None
                for record in records:
                    if offset is None or record.offset > offset:
                        offset = record.offset

                if offset:
                    zk_trans.set_data(
                        os.path.join(zk_path_topic_tmpl.format(topic),
                                     str(partition)), str(offset))

            zk_trans.commit()

        zk_conn.stop()
Beispiel #8
0
class ZookClient(object):

    def __init__(self, api_client=None):
        logging.basicConfig()
        # Create a client and start it
        self.zk = KazooClient()
        self.zk.start()

    def create_accounts_path(self, name, **kwargs):
        path = "/dso/" + name
        self.zk.ensure_path(path)
        self.zk.set(path, b"id: 7b4235ca-00fb-4dca-ad3e-8b6e3662631a\ngroupname: hr\ndescription: 人力资源")


    def create_accountinfo_path(self, account_id, **kwargs):
        self.zk.ensure_path("/app/someservice")

    def create_path(self, path, **kwargs):
        self.zk.ensure_path(path)

    def get_data(self, path):
        return self.zk.get(path)

    def test_tran(self):
        self.zk.delete("/app22")
        self.zk.create("/app22", b"" + '{"12": "12"}')

        tran = self.zk.transaction()
        tran.delete("/app22")
        tran.create("/app22", b"" + '{"22": "22"}')
        tran.commit()
        print "commit"




    def stop(self):
        # In the end, stop it
        self.zk.stop()
Beispiel #9
0
class ZookClient(object):
    def __init__(self, api_client=None):
        logging.basicConfig()
        # Create a client and start it
        self.zk = KazooClient()
        self.zk.start()

    def create_accounts_path(self, name, **kwargs):
        path = "/dso/" + name
        self.zk.ensure_path(path)
        self.zk.set(
            path,
            b"id: 7b4235ca-00fb-4dca-ad3e-8b6e3662631a\ngroupname: hr\ndescription: 人力资源"
        )

    def create_accountinfo_path(self, account_id, **kwargs):
        self.zk.ensure_path("/app/someservice")

    def create_path(self, path, **kwargs):
        self.zk.ensure_path(path)

    def get_data(self, path):
        return self.zk.get(path)

    def test_tran(self):
        self.zk.delete("/app22")
        self.zk.create("/app22", b"" + '{"12": "12"}')

        tran = self.zk.transaction()
        tran.delete("/app22")
        tran.create("/app22", b"" + '{"22": "22"}')
        tran.commit()
        print "commit"

    def stop(self):
        # In the end, stop it
        self.zk.stop()
Beispiel #10
0
    '''
    znodes = async_obj.get()
    try:
        children = async_obj.get()
        # すべての子ノードの名称を出力
        print('#####[print child znodes]#####')
        for child in children:
            print(child)
    except (ConnectionLossException, NoAuthException):
        print("ERROR!!!")
        sys.exit(1)


if __name__ == '__main__':
    # トランザクションの開始
    tx = zk.transaction()
    ## 基本的な使い方を確認
    # パスの生成
    zk.ensure_path(root)
    # znodeが未作成であれば作成
    znode = root + '/sample_znode'
    if zk.exists(znode) is None:
        zk.create(znode, b'sample_data')
    print_status(znode)
    # データの更新
    zk.set(znode, b'updated_data')
    print_status(znode)
    # 子ノードの追加
    znode2 = root + '/sample_znode2'
    if zk.exists(znode2) is None:
        zk.create(znode2, b'sample_data2')
Beispiel #11
0
class ZkSyncManager(object):

    RETRIES = 2
    LOCK_TIMEOUT = 3

    def __init__(self, host='127.0.0.1:2181', lock_path_prefix='/mastermind/locks/'):
        self.client = KazooClient(host, timeout=3)
        logger.info('Connecting to zookeeper host {0}, '
            'lock_path_prefix: {1}'.format(host, lock_path_prefix))
        try:
            self.client.start()
        except Exception as e:
            logger.error(e)
            raise

        self._retry = KazooRetry(max_tries=self.RETRIES)

        self.lock_path_prefix = lock_path_prefix

    @contextmanager
    def lock(self, lockid, blocking=True, timeout=LOCK_TIMEOUT):
        # with self.__locks_lock:
        lock = Lock(self.client, self.lock_path_prefix + lockid)
        try:
            acquired = lock.acquire(blocking=blocking, timeout=timeout)
            logger.debug('Lock {0} acquired: {1}'.format(lockid, acquired))
            if not acquired:
                raise LockFailedError(lock_id=lockid)
            yield
        except LockTimeout:
            logger.info('Failed to acquire lock {0} due to timeout '
                '({1} seconds)'.format(lockid, timeout))
            raise LockFailedError(lock_id=lockid)
        except LockFailedError:
            raise
        except Exception as e:
            logger.error('Failed to acquire lock {0}: {1}\n{2}'.format(
                lockid, e, traceback.format_exc()))
            raise
        finally:
            lock.release()

    def persistent_locks_acquire(self, locks, data=''):
        try:
            retry = self._retry.copy()
            result = retry(self._inner_persistent_locks_acquire, locks=locks, data=data)
        except RetryFailedError:
            raise LockError
        except KazooException as e:
            logger.error('Failed to fetch persistent locks {0}: {1}\n{2}'.format(
                locks, e, traceback.format_exc()))
            raise LockError
        return result

    def _inner_persistent_locks_acquire(self, locks, data):

        ensured_paths = set()

        tr = self.client.transaction()
        for lockid in locks:
            path = self.lock_path_prefix + lockid
            parts = path.rsplit('/', 1)
            if len(parts) == 2 and parts[0] not in ensured_paths:
                self.client.ensure_path(parts[0])
                ensured_paths.add(parts[0])
            tr.create(path, data)

        failed = False
        failed_locks = []
        result = tr.commit()
        for i, res in enumerate(result):
            if isinstance(res, ZookeeperError):
                failed = True
            if isinstance(res, NodeExistsError):
                failed_locks.append(locks[i])

        if failed_locks:
            holders = []
            for f in failed_locks:
                # TODO: fetch all holders with 1 transaction request
                holders.append((f, self.client.get(self.lock_path_prefix + f)))
            foreign_holders = [(l, h) for l, h in holders if h[0] != data]
            failed_lock, holder_resp = foreign_holders and foreign_holders[0] or holders[0]
            holder = holder_resp[0]
            holders_ids = list(set([h[0] for _, h in holders]))
            logger.warn('Persistent lock {0} is already set by {1}'.format(failed_lock, holder))
            raise LockAlreadyAcquiredError(
                'Lock for {0} is already acquired by job {1}'.format(failed_lock, holder),
                lock_id=failed_lock, holder_id=holder, holders_ids=holders_ids)
        elif failed:
            logger.error('Failed to set persistent locks {0}, result: {1}'.format(
                locks, result))
            raise LockError

        return True

    def get_children_locks(self, lock_prefix):
        try:
            retry = self._retry.copy()
            result = retry(self.__inner_get_children_locks, lock_prefix)
        except RetryFailedError:
            raise LockError
        return result

    def __inner_get_children_locks(self, lock_prefix):
        full_path = self.lock_path_prefix + lock_prefix
        self.client.ensure_path(os.path.normpath(full_path))
        result = self.client.get_children(full_path)
        return ['{0}{1}'.format(lock_prefix, lock) for lock in result]

    def persistent_locks_release(self, locks, check=''):
        try:
            retry = self._retry.copy()
            result = retry(self.__inner_persistent_locks_release, locks=locks, check=check)
        except RetryFailedError:
            raise LockError
        except KazooException as e:
            logger.error('Failed to remove persistent locks {0}: {1}\n{2}'.format(
                locks, e, traceback.format_exc()))
            raise LockError
        return result

    def __inner_persistent_locks_release(self, locks, check):
        for lockid in locks:
            try:
                if check:
                    data = self.client.get(self.lock_path_prefix + lockid)
                    if data[0] != check:
                        logger.error('Lock {0} has inconsistent data: {1}, '
                            'expected {2}'.format(lockid, data[0], check))
                        raise InconsistentLockError(lock_id=lockid, holder_id=data[0])
                self.client.delete(self.lock_path_prefix + lockid)
            except NoNodeError:
                logger.warn('Persistent lock {0} is already removed'.format(lockid))
                pass
        return True
Beispiel #12
0
    if state == KazooState.LOST:
    # Register somewhere that the session was lost
        print 'The session is lost: %s' % str(state)
    elif state == KazooState.SUSPENDED:
    # Handle being disconnected from Zookeeper
        print 'The session is suspended: %s' % str(state)
    else:
    # Handle being connected/reconnected to Zookeeper
        print 'The session is reconnected: %s' % str(state)

zk = KazooClient(hosts='127.0.0.1:2181')
zk.start()
zk.add_listener(my_listener)

# Ensure a path, create if necessary
zk.ensure_path("/dragonflow/table1")

# Determine if a node exists
if zk.exists("/dragonflow/table1/key1"):

    # Do transaction
    transaction = zk.transaction()
    transaction.create('/dragonflow/table1/key9', b"value9")
    transaction.set_data('/dragonflow/table1/key1', b"value8")
    results = transaction.commit()

    print(results)

result = zk.get('/dragonflow/table1/key8')
print result[0]
Beispiel #13
0
class ZookeeperClusterManager(ClusterManager):
    """
    A cluster manager that manages one cluster's state and configurations
    with a Zookeeper ensemble via kazoo.

    Below is the structure of the znodes:
        /needlestack
            /<CLUSTER_NAME_1>
                /live_nodes
                    /<HOSTPORT_1>
                    /<HOSTPORT_2>
                    /<HOSTPORT_3>
                    /<HOSTPORT_4>
                    ...
                /collections
                    /<COLLECTION_NAME_1>
                        /shards
                            /<SHARD_NAME_1>
                                /replicas
                                    /<HOSTPORT_2>
                                    /<HOSTPORT_4>
                            /<SHARD_NAME_2>
                                /replicas
                                    /<HOSTPORT_1>
                                    /<HOSTPORT_3>
                    /<COLLECTION_NAME_2>
                        ...
    """

    cluster_name: str
    hostport: str
    zk: KazooClient
    cache: TreeCache

    def __init__(self, cluster_name: str, hostport: str, hosts: List[str],
                 zookeeper_root: str):
        self.cluster_name = cluster_name
        self.hostport = hostport
        self.zookeeper_root = zookeeper_root
        self.zk = KazooClient(hosts=hosts)
        self.zk.add_listener(self.zk_listener)
        self.cache = TreeCache(self.zk, self.base_znode)

    @property
    def base_znode(self):
        return f"{self.zookeeper_root}/{self.cluster_name}"

    @property
    def live_nodes_znode(self):
        return f"{self.base_znode}/live_nodes"

    @property
    def this_node_znode(self):
        return f"{self.base_znode}/live_nodes/{self.hostport}"

    @property
    def collections_znode(self):
        return f"{self.base_znode}/collections"

    def collection_znode(self, collection_name: str) -> str:
        return f"{self.collections_znode}/{collection_name}"

    def shard_znode(self, collection_name: str, shard_name: str = None) -> str:
        znode = f"{self.collections_znode}/{collection_name}/shards"
        if shard_name:
            znode += "/" + shard_name
        return znode

    def replica_znode(self,
                      collection_name: str,
                      shard_name: str,
                      hostport: str = None) -> str:
        shard_znode = self.shard_znode(collection_name, shard_name)
        znode = f"{shard_znode}/replicas"
        if hostport:
            znode += "/" + hostport
        return znode

    def startup(self):
        self.zk.start()
        self.cache.start()
        signal.signal(signal.SIGINT, self.signal_listener)
        signal.signal(signal.SIGTERM, self.signal_listener)
        self.zk.ensure_path(self.live_nodes_znode)
        self.zk.ensure_path(self.collections_znode)

    def shutdown(self):
        self.cache.close()
        self.zk.stop()
        self.zk.close()

    def cleanup(self):
        logger.info(f"Removing ZNodes via cleanup")
        transaction = self.zk.transaction()

        for collection in self.list_local_collections():
            for shard in collection.shards:
                for replica in shard.replicas:
                    znode = self.replica_znode(collection.name, shard.name,
                                               replica.hostport)
                    transaction.delete(znode)

        self.commit_transaction(transaction)

    def register_merger(self):
        pass

    def register_searcher(self):
        try:
            retrier = KazooRetry(max_tries=5, delay=1, backoff=2, max_delay=20)
            retrier(self.zk.create,
                    self.this_node_znode,
                    ephemeral=True,
                    makepath=True)
            logger.info(f"Created ephemeral ZNode {self.this_node_znode}")
        except kazoo.retry.RetryFailedError:
            logger.error(
                f"Max retries reached for creating ephemeral ZNode {self.this_node_znode}"
            )
        except kazoo.retry.InterruptedError:
            logger.error(
                f"Retries interrupted for creating ephemeral ZNode {self.this_node_znode}"
            )

    def set_state(self,
                  state,
                  collection_name=None,
                  shard_name=None,
                  hostport=None):
        transaction = self.zk.transaction()

        collections = [collection_name] if collection_name else None
        for collection in self._list_collections(collections,
                                                 hostport=hostport,
                                                 load_replica=True):
            logger.info(
                f"Set {collection.name}/shards ZNodes to {collections_pb2.Replica.State.Name(state)}"
            )
            for shard in collection.shards:
                for replica in shard.replicas:
                    znode = self.replica_znode(collection.name, shard.name,
                                               replica.node.hostport)
                    replica.state = state
                    transaction.set_data(znode, replica.SerializeToString())

        return self.commit_transaction(transaction)

    def set_local_state(self, state, collection_name=None, shard_name=None):
        return self.set_state(state, collection_name, shard_name,
                              self.hostport)

    def signal_listener(self, signum, frame):
        self.shutdown()

    def zk_listener(self, state):
        if state == KazooState.LOST:
            logger.warn("Connection to Zookeeper lost")
        elif state == KazooState.SUSPENDED:
            logger.warn("Connection to Zookeeper disconnected")
        else:
            logger.info("Connection to Zookeeper established")

    def add_collections(self, collections):
        """Configure a list of collections into Zookeeper
        """
        transaction = self.zk.transaction()

        for collection in collections:
            collection_copy = deepcopy(collection)
            collection_copy.ClearField("shards")
            collection_znode = self.collection_znode(collection.name)
            transaction.create(collection_znode,
                               collection_copy.SerializeToString())
            transaction.create(self.shard_znode(collection.name))
            for shard in collection.shards:
                shard_copy = deepcopy(shard)
                shard_copy.ClearField("replicas")
                shard_znode = self.shard_znode(collection.name, shard.name)
                transaction.create(shard_znode, shard_copy.SerializeToString())
                transaction.create(
                    self.replica_znode(collection.name, shard.name))
                for replica in shard.replicas:
                    replica_copy = deepcopy(replica)
                    replica_copy.state = collections_pb2.Replica.BOOTING
                    replica_znode = self.replica_znode(collection.name,
                                                       shard.name,
                                                       replica.node.hostport)
                    transaction.create(replica_znode,
                                       replica_copy.SerializeToString())

        if self.commit_transaction(transaction):
            return collections
        else:
            return []

    def delete_collections(self, collection_names):
        transaction = self.zk.transaction()

        for collection_name in collection_names:
            shards_znode = self.shard_znode(collection_name)
            for shard_name in self.zk.get_children(shards_znode):
                replicas_znode = self.replica_znode(collection_name,
                                                    shard_name)
                for replica_name in self.zk.get_children(replicas_znode):
                    replica_znode = self.replica_znode(collection_name,
                                                       shard_name,
                                                       replica_name)
                    transaction.delete(replica_znode)
                transaction.delete(replicas_znode)
                transaction.delete(
                    self.shard_znode(collection_name, shard_name))
            transaction.delete(shards_znode)
            transaction.delete(self.collection_znode(collection_name))

        if self.commit_transaction(transaction):
            return collection_names
        else:
            return []

    def list_nodes(self):
        live_nodes = self.zk.get_children(self.live_nodes_znode)
        nodes = [collections_pb2.Node(hostport=node) for node in live_nodes]
        return nodes

    def list_collections(self, collection_names=None, include_state=True):
        return self._list_collections(collection_names,
                                      load_replica=include_state)

    def list_local_collections(self, include_state=True):
        return self._list_collections(hostport=self.hostport,
                                      load_replica=include_state)

    def _list_collections(
        self,
        collection_names: Optional[List[str]] = None,
        hostport: Optional[str] = None,
        load_replica: Optional[bool] = True,
    ) -> List[collections_pb2.Collection]:
        collections = []

        collection_names = collection_names or self.zk.get_children(
            self.collections_znode)
        for collection_name in collection_names:

            shards = []
            shards_znode = self.shard_znode(collection_name)
            for shard_name in self.zk.get_children(shards_znode):

                replicas = []
                replicas_znode = self.replica_znode(collection_name,
                                                    shard_name)
                for replica_hostport in self.zk.get_children(replicas_znode):
                    if hostport == replica_hostport or hostport is None:
                        replica_znode = self.replica_znode(
                            collection_name, shard_name, replica_hostport)
                        if load_replica:
                            replica_data, _ = self.zk.get(replica_znode)
                            replica_proto = collections_pb2.Replica.FromString(
                                replica_data)
                        else:
                            replica_proto = collections_pb2.Replica()
                        replicas.append(replica_proto)

                if replicas:
                    shard_znode = self.shard_znode(collection_name, shard_name)
                    shard_data, _ = self.zk.get(shard_znode)
                    shard_proto = collections_pb2.Shard.FromString(shard_data)
                    shard_proto.replicas.extend(replicas)
                    shards.append(shard_proto)

            if shards:
                collection_znode = self.collection_znode(collection_name)
                collection_data, _ = self.zk.get(collection_znode)
                collection_proto = collections_pb2.Collection.FromString(
                    collection_data)
                collection_proto.shards.extend(shards)
                collections.append(collection_proto)

        return collections

    def get_searchers(self, collection_name, shard_names=None):
        if not shard_names:
            shards_znode = self.shard_znode(collection_name)
            shard_names = self.cache.get_children(shards_znode, [])

        shard_hostports = []
        for shard_name in shard_names:
            hostports = self._get_searchers_for_shard(collection_name,
                                                      shard_name,
                                                      active=True)
            if hostports:
                shard_hostports.append((shard_name, hostports))
            else:
                logger.error(
                    f"No active Searcher node for {collection_name}/{shard_name}."
                )

        return shard_hostports

    def _get_searchers_for_shard(self,
                                 collection_name: str,
                                 shard_name: str,
                                 active: bool = True) -> List[str]:
        replicas_znode = self.replica_znode(collection_name, shard_name)
        hostports = self.cache.get_children(replicas_znode, [])

        if active:
            active_hostports = []
            for hostport in hostports:
                replica_znode = self.replica_znode(collection_name, shard_name,
                                                   hostport)
                node = self.cache.get_data(replica_znode)
                if node:
                    replica = collections_pb2.Replica.FromString(node.data)
                    if replica.state == collections_pb2.Replica.ACTIVE:
                        active_hostports.append(hostport)
            hostports = active_hostports

        return hostports

    def commit_transaction(
            self, transaction: kazoo.client.TransactionRequest) -> bool:
        """Commit a transaction and log the first exception after rollbacks"""
        for result, operation in zip(transaction.commit(),
                                     transaction.operations):
            if isinstance(result, kazoo.exceptions.RolledBackError):
                continue
            elif isinstance(result, Exception):
                logger.error(
                    f"{result.__class__.__name__} in Kazoo transaction: {operation}"
                )
                return False
        return True
Beispiel #14
0
class ZookClient(object):
    def __init__(self, zookeeper_host=None):

        logging.basicConfig()
        # Create a client and start it
        if zookeeper_host is None:
            zookeeper_host = "localhost:2181"
        self.zk = KazooClient(zookeeper_host)
        self.zk.start()
        self.tran = self.zk.transaction()
        self.CONST_BASE_PATH = "/dso/"
        self.CONST_ACCOUNTS_PATH = "accounts/"
        self.CONST_GROUPS_PATH = "groups/"
        self.CONST_USERS_PATH = "users/"
        self.CONST_HOSTS_PATH = "hosts/"
        self.CONST_SERVERS_PATH = "servers/"
        self.CONST_SERVICES_PATH = "services/"
        self.CONST_INSTANCES_PATH = "instances/"

        self.CONST_MAPPING_PATH = "Mapping/"
        self.CONST_IP2USER_PATH = "Ip2User/"
        self.CONST_MAC2USER_PATH = "Mac2User/"
        self.CONST_IP2VMPATH = "Ip2Vm/"
        self.CONST_USER2ACCOUNT_PATH = "User2Account/"
        self.CONST_AID2ANAME_PATH = "Aid2Aname/"
        self.CONST_VM_INFO_PATH = "VmInfo/"
        self.CONST_DSO_PATH = "dso/"
        self.CONST_HOST_MAPPING = dict(vrouter='fedora', dns='ubuntu', firewall='ubuntu', ipsecvpn='centos', vpc='ubuntu')

        # create constant base path

        self.zk.ensure_path(self.CONST_BASE_PATH)

        # create accounts path
        self.zk.ensure_path(self.CONST_BASE_PATH + self.CONST_ACCOUNTS_PATH)

        # create mapping path
        self.zk.ensure_path(self.CONST_BASE_PATH + self.CONST_MAPPING_PATH)
        self.zk.ensure_path(self.CONST_BASE_PATH + self.CONST_MAPPING_PATH + self.CONST_IP2USER_PATH)
        self.zk.ensure_path(self.CONST_BASE_PATH + self.CONST_MAPPING_PATH + self.CONST_IP2VMPATH)
        self.zk.ensure_path(self.CONST_BASE_PATH + self.CONST_MAPPING_PATH + self.CONST_MAC2USER_PATH)

        # create dso path in mapping ip2vm for jinzhou use
        dso_path = self.CONST_BASE_PATH + self.CONST_MAPPING_PATH +self.CONST_IP2VMPATH + self.CONST_DSO_PATH
        self.zk.ensure_path(dso_path)
        self.zk.set(dso_path, b"" + '{"accountname": "dso"}')

        dso_ip_path = 'dso_path + "172.19.0.101"'
        self.zk.ensure_path(dso_ip_path)
        self.zk.set(dso_ip_path, b"" + '{"hostname": "dso-server", "manageip": "172.19.0.101", "servicename": "dso"}')

    def create_account_path(self, account_info):
        """
            the path to a account
            :param account_info:
        """
        # create accounts path
        accounts_path = self.CONST_BASE_PATH + self.CONST_ACCOUNTS_PATH
        if not self.zk.exists(accounts_path):
            self.zk.ensure_path(accounts_path)

        path = accounts_path + account_info.id
        account_data = dict(id=account_info.id,
                            name=account_info.account_name)
        self.zk.ensure_path(path)
        self.zk.set(path, b"" + json.dumps(account_data).encode('utf8'))

    def create_accountinfo_path(self, account_info):
        account_path = self.CONST_BASE_PATH + self.CONST_ACCOUNTS_PATH + account_info.id + "/"
        if not self.zk.exists(account_path):
            print "Path" + account_path + " does not exist."

        # create groups path
        groups_path = account_path + self.CONST_GROUPS_PATH
        self.zk.ensure_path(groups_path)

        # set each group
        for group_item in account_info.groups:
            self.create_usergroup_path(groups_path, group_item)

        # create service path
        services_path = account_path + self.CONST_SERVICES_PATH
        self.zk.ensure_path(services_path)

        # set services summary
        self.gen_services_summay(services_path, account_info.services)

        # set each service
        for service_item in account_info.services:
            self.create_service_path(services_path, service_item)

    def create_service_path(self, parent_path, service):
        if service is None or service.servicename is None:
            return
        service_path = parent_path + service.servicename + "/"
        # service_data = "id: " + service.id + "\nservicename: " + service.servicename
        service_data = dict(id=service.id, servicename=service.servicename)

        self.zk.ensure_path(service_path)
        self.zk.set(service_path, b"" + json.dumps(service_data).encode('utf8'))

        # create instances path
        instances_path = service_path + self.CONST_INSTANCES_PATH
        self.zk.ensure_path(instances_path)

        # set each instance
        for instance_item in service.instances:
            self.create_instance_path(instances_path, instance_item, service.servicename)

    def create_instance_path(self, parent_path, instance, service_name):
        if instance.id is None:
            print "instance.id is None, maybe is vpc, and vpc is fail"
            return
        instance_path = parent_path + instance.id
        # instance_data = "id: " + instance.id + \
        #                 "\nmac: " + self.check_none(instance.mac) + \
        #                 "\nmanageip: " + self.check_none(instance.manageip) + \
        #                 "\npublicip: " + self.check_none(instance.publicip) + \
        #                 "\npublicgateway: " + self.check_none(instance.publicgateway) + \
        #                 "\npublicnetmask: " + self.check_none(instance.publicnetmask) + \
        #                 "\nserviceip: " + self.check_none(instance.serviceip) + \
        #                 "\nstatus: " + self.check_none(instance.status)

        instance_data = dict(id=instance.id,
                             mac=instance.mac,
                             manageip=instance.manageip,
                             publicip=instance.publicip,
                             publicgateway=instance.publicgateway,
                             publicnetmask=instance.publicnetmask,
                             serviceip=instance.serviceip,
                             status=instance.status,
                             user=self.CONST_HOST_MAPPING[service_name]
                             )

        self.zk.ensure_path(instance_path)
        # self.zk.set(instance_path, b"" + instance_data.encode('utf8'))
        self.zk.set(instance_path, b"" + json.dumps(instance_data).encode('utf8'))
        return instance_path

    def create_usergroup_path(self, parent_path, group):
        if group is None or group.groupname is None:
            return
        group_path = parent_path + group.groupname + "/"
        # group_data = "id: " + group.id + "\ngroupname: " + \
        #              group.groupname + "\ndescription: " + self.check_none(group.description)

        group_data = dict(
                id=group.id,
                groupname=group.groupname,
                description=group.description
        )

        self.zk.ensure_path(group_path)
        self.zk.set(group_path, b"" + json.dumps(group_data).encode('utf8'))

        # create users path
        users_path = group_path + self.CONST_USERS_PATH
        self.zk.ensure_path(users_path)

        # set each user
        for user_item in group.users:
            self.create_user_path(users_path, user_item)

        # create servers path
        servers_path = group_path + self.CONST_SERVERS_PATH
        self.zk.ensure_path(servers_path)

        # set each server/host
        for server in group.servers:
            self.create_host_path(servers_path, server)

    def create_user_path(self, parent_path, user):
        if user is None or user.name is None:
            print "user is None"
            return
        user_path = parent_path + user.name + "/"
        # user_data = "id: " + user.id + "\nname: " + \
        #             user.name + "\nemail: " + self.check_none(user.email)

        user_data = dict(
                id=user.id,
                name=user.name,
                email=user.email)

        self.zk.ensure_path(user_path)
        self.zk.set(user_path, b"" + json.dumps(user_data).encode('utf8'))

        # create hosts path
        hosts_path = user_path + self.CONST_HOSTS_PATH
        self.zk.ensure_path(hosts_path)

        # set each hosts
        for host_item in user.hosts:
            self.create_host_path(hosts_path, host_item)

    def create_host_path(self, parent_path, host):
        if host is None or host.mac is None:
            return

        host_path = parent_path + host.mac

        # host_data = "ip: " + self.check_none(host.ip) + "\nmac: " + \
        #             host.mac + "\nhost_name: " + \
        #             self.check_none(host.mac) + \
        #             "\ndomain: " + self.check_none(host.domain)

        host_data = dict(
                ip=host.ip,
                mac=host.mac,
                host_name=host.host_name,
                domain=host.domain)

        self.zk.ensure_path(host_path)
        self.zk.set(host_path, b"" + json.dumps(host_data).encode('utf8'))
        return host_path

    def delete_dso_path(self):
        self.zk.delete("/dso", recursive=True)
        self.zk.ensure_path("/dso")

    def delete_account_path(self, account_id):
        account_path = self.CONST_BASE_PATH + self.CONST_ACCOUNTS_PATH + account_id
        self.zk.delete(account_path, recursive=True)

    def delete_account_mapping_path(self, account_id):

        ip2vm_path = self.CONST_BASE_PATH + self.CONST_MAPPING_PATH + self.CONST_IP2VMPATH + account_id
        ip2user_path = self.CONST_BASE_PATH + self.CONST_MAPPING_PATH + self.CONST_IP2USER_PATH + account_id
        mac2user_path = self.CONST_BASE_PATH + self.CONST_MAPPING_PATH + self.CONST_MAC2USER_PATH + account_id

        self.zk.delete(ip2vm_path, recursive=True)
        self.zk.delete(ip2user_path, recursive=True)
        self.zk.delete(mac2user_path, recursive=True)

    @staticmethod
    def check_none(data):
        return "None" if data is None else data

    def create_user2account_path(self, user2account):
        path = self.CONST_BASE_PATH + self.CONST_MAPPING_PATH + self.CONST_USER2ACCOUNT_PATH
        self.zk.ensure_path(path)
        self.zk.set(path, b"" + json.dumps(user2account).encode('utf8'))

    def create_aid2aname_path(self, aid2aname):
        if not self.zk.exists(self.CONST_BASE_PATH + self.CONST_MAPPING_PATH):
            self.zk.ensure_path(self.CONST_BASE_PATH + self.CONST_MAPPING_PATH)
        path = self.CONST_BASE_PATH + self.CONST_MAPPING_PATH + self.CONST_AID2ANAME_PATH
        self.zk.ensure_path(path)
        self.zk.set(path, b"" + json.dumps(aid2aname).encode('utf8'))

    def create_ip2user_path(self, account_info, ip2user):

        ip2user_path = self.CONST_BASE_PATH + self.CONST_MAPPING_PATH + self.CONST_IP2USER_PATH

        account_path = ip2user_path + account_info.id + "/"
        tran = self.zk.transaction()

        # delete childern
        if self.zk.exists(account_path):
            children = self.zk.get_children(account_path)
            for e in children:
                tran.delete(account_path + e)
            tran.delete(account_path)
        account_data = {"account_name": account_info.account_name}
        tran.create(account_path, b"" + json.dumps(account_data).encode('utf8'))

        for ip in ip2user.keys():
            if ip is None:
                print "Ip is None"
                continue
            ip_path = account_path + ip

            tran.create(ip_path, b"" + json.dumps(ip2user[ip]).encode('utf8'))
        tran.commit()

    def create_mac2user_path(self, account_info, mac2user):

        mac2user_path = self.CONST_BASE_PATH + self.CONST_MAPPING_PATH + self.CONST_MAC2USER_PATH
        account_path = mac2user_path + account_info.id + "/"

        tran = self.zk.transaction()
        # delete childern
        if self.zk.exists(account_path):
            children = self.zk.get_children(account_path)
            for e in children:
                tran.delete(account_path + e)
            tran.delete(account_path)
        account_data = {"account_name": account_info.account_name}
        tran.create(account_path, b"" + json.dumps(account_data).encode('utf8'))

        for mac in mac2user.keys():
            if mac is None:
                print "mac is None"
                continue
            ip_path = account_path + mac

            tran.create(ip_path, b"" + json.dumps(mac2user[mac]).encode('utf8'))
        tran.commit()

    def create_ip2vm_path(self, account_info):

        instance_dict = self.get_instances(account_info)

        zoo_instances = {}

        base_path = self.CONST_BASE_PATH + self.CONST_MAPPING_PATH + self.CONST_IP2VMPATH
        account_path = base_path + account_info.id + "/"

        account_data = dict(accountname=account_info.account_name)
        tran = self.zk.transaction()

        # delete not use node
        if self.zk.exists(account_path):
            # update account
            if self.zk.get(account_path)[0] != json.dumps(account_data):
                tran.set_data(account_path, b"" + json.dumps(account_data).encode('utf8'))

            children = self.zk.get_children(account_path)
            for e in children:
                if instance_dict.get(e, None) is None:
                    tran.delete(account_path + e)
                else:
                    zoo_vm = json.loads(self.zk.get(account_path + e)[0])
                    zoo_instances[e] = zoo_vm
        else:
            tran.create(account_path, b"" + json.dumps(account_data).encode('utf8'))

        # update vms
        for ip, vm in instance_dict.iteritems():
            if zoo_instances.get(ip, None) is not None:
                if not self.same_instance(vm, zoo_instances.get(ip)):
                    tran.set_data(account_path + ip, b"" + json.dumps(vm).encode('utf8'))
            else:
                tran.create(account_path + ip, b"" + json.dumps(vm).encode('utf8'))
        tran.commit()

    def sync_mapping_pre_account(self, account_info, vpn_clients):

        user2account = {}
        ip2user = {}
        mac2user = {}
        for group in account_info.groups:
            for user in group.users:
                user2account[user.id] = account_info.id
                for host in user.hosts:
                    user_data = dict(userid=user.id, username=user.name,
                                     useremail=user.email, groupname=group.groupname)
                    if host.ip is not None:
                        ip2user[host.ip] = user_data
                        mac2user[host.mac] = user_data

        # add vpn info
        for vpn in vpn_clients:
            vpn_user_data = dict(userid='vpn_user', username=vpn.user_name, useremail='', groupname=vpn.group)
            if vpn.ip is not None:
                vpn_ip = vpn.ip
                if vpn_ip.__contains__("/"):
                    vpn_ip = vpn_ip.split("/")[0]

                ip2user[vpn_ip] = vpn_user_data

        # create node
        self.create_ip2user_path(account_info, ip2user)

        # create node mac2user
        self.create_mac2user_path(account_info, mac2user)

        # not used now
        # self.create_user2account_path(user2account)

        # create ip2vm
        self.create_ip2vm_path(account_info)

    def gen_vm_path(self, vm_info):
        if not self.zk.exists(self.CONST_BASE_PATH + self.CONST_MAPPING_PATH):
            self.zk.ensure_path(self.CONST_BASE_PATH + self.CONST_MAPPING_PATH)
        vms_path = self.CONST_BASE_PATH + self.CONST_MAPPING_PATH + self.CONST_VM_INFO_PATH
        self.zk.ensure_path(vms_path)
        for vm in vm_info:
            vm_node_path = vms_path + vm.manage_ip.replace('/', '-')
            vm_dict = dict(id=vm.id, type=vm.type, status=vm.status)

            self.zk.ensure_path(vm_node_path)
            self.zk.set(vm_node_path, b"" + json.dumps(vm_dict).encode('utf8'))

    def create_mapping_accounts(self, accounts):
        aid2aname = {}
        for account in accounts:
            aid2aname[account.id] = account.account_name
        self.create_aid2aname_path(aid2aname)

    def gen_services_summay(self, path, services):
        services_data = {}
        for service_item in services:
            instance_array = ""
            for instance in service_item.instances:
                instance_data = "<table class=\"table\">" \
                                "<tr>" \
                                "<td>manageip</td><td>" + self.check_none(instance.manageip) \
                                + "</td>" \
                                "</tr>" \
                                "<tr>" \
                                "<td>publicip</td><td>" + self.check_none(
                    instance.publicip) + "</td>" \
                                         "</tr>" \
                                         "<tr>" \
                                         "<td>publicgateway</td><td>" + self.check_none(
                    instance.publicgateway) + "</td>" \
                                              "</tr>" \
                                              "<tr>" \
                                              "<td>publicnetmask</td><td>" + self.check_none(
                    instance.publicnetmask) + "</td>" \
                                              "</tr>" \
                                              "<tr>" \
                                              "<td>serviceip</td><td>" + self.check_none(instance.serviceip) \
                                            + "</td>" \
                                              "</tr>" \
                                              "<tr><td>status</td><td>" + self.check_none(
                    instance.status) + "</td>" \
                                       "</tr>" \
                                            "<tr><td>user</td><td>" + \
                                self.check_none(self.CONST_HOST_MAPPING[service_item.servicename]) \
                                     + "</td>" \
                                       "</tr>" \
                                       "</table>"

                instance_array += instance_data + "\n"
            services_data[service_item.servicename] = instance_array

        self.zk.set(path, b"" + json.dumps(services_data).encode('utf8'))



    def get_all_account(self):
        accounts_path = self.CONST_BASE_PATH + self.CONST_ACCOUNTS_PATH
        account_list = self.zk.get_children(accounts_path)
        return account_list

    def get_instances(self, account_info):
        instances_dict = {}
        for service_item in account_info.services:
            service_name = service_item.servicename
            for instance in service_item.instances:
                if instance.manageip is None:
                    continue
                manageip = instance.manageip
                if manageip is None:
                    continue
                if instance.manageip.__contains__('/'):
                    manageip =manageip[: manageip.find('/')]

                host_name = account_info.id + '-' + service_name

                instance_data = dict(id=instance.id,
                                     mac=instance.mac,
                                     manageip=instance.manageip,
                                     publicip=instance.publicip,
                                     publicgateway=instance.publicgateway,
                                     publicnetmask=instance.publicnetmask,
                                     serviceip=instance.serviceip,
                                     status=instance.status,
                                     servicename=service_name,
                                     hostname=host_name
                                     )
                instances_dict[manageip] = instance_data
        return instances_dict

    def commit(self):
        self.tran.commit()

    def stopZooK(self):
        # In the end, stop it
        self.zk.stop()

    def same_instance(self, vm0, vm1):
        return vm0.get('id') == vm1.get('id') and vm0.get('hostname') == vm1.get('hostname')
Beispiel #15
0
class ZkOperation(object):
    def __init__(self, zk_hosts, zk_root):
        self.zk = KazooClient(zk_hosts)
        self.root = zk_root
        self.tasks = set()
        self.event = threading.Event()

    def start(self):
        if self.zk.exists:
            self.zk.start()
            self.zk.add_auth('digest', 'publish:publish')
        if self.zk.connected:
            self.zk.ensure_path(self.root)

    def is_job_exist(self, job_id):
        if job_id == '':
            raise Exception('job_id is ""')
        node = self.root + '/jobs/' + job_id
        return self.zk.exists(node)

    def check_task_status(self, path):
        if path == '':
            raise Exception('path is ""')
        node = self.root + path
        data, _ = self.zk.get(node)
        return data['Status']

    def _is_exist(self, node):
        if self.zk.connected and self.zk.exists(node):
            if self.zk.exists(node):
                return True
            else:
                return False
        else:
            logger.error('zk not connected or node is exists')
            return False

    def _create_node(self, node, value=None):
        if value is None:
            value = ''
        value = json.dumps(value)
        if self.zk.connected and not self.zk.exists(node):
            self.zk.create(node, makepath=True, value=value.encode())
            return True
        else:
            logger.error('zk not connected or node is exists')
            return False

    def _update_node(self, node, value):
        if self.zk.connected and self.zk.exists(node):
            tx = self.zk.transaction()
            tx.set_data(node, json.dumps(value).encode())
            tx.commit()
            return True
        else:
            logger.error(
                'update node failed: zk not connected or node is not exists')
            return False

    def _get_node(self, node):
        if self.zk.connected and self.zk.exists(node):
            node_value, _ = self.zk.get(node)
            if node_value:
                return json.loads(node_value.decode())
            else:
                return {}
        else:
            logger.error('zk not connected or node is exists')
            return False

    def _delete_node(self, node):
        if self.zk.connected:
            if not self.zk.exists(node):
                return True
            else:
                self.zk.delete(node, recursive=True)
                return True
        else:
            logger.error('zk not connected')
            return False

    # is exist
    def is_exist_signal(self, job_id):
        node = '/{}/signal/{}'.format(self.root, job_id)
        return self._is_exist(node)

    # CREATE
    def create_new_job(self, job_id, job_value=None):
        if job_value is None:
            job_value = ''
        if job_id != '':
            node = self.root + '/jobs/' + job_id
            ret = self._create_node(node, job_value)
            return ret
        else:
            logger.error('job_id is null')
            return False

    def create_new_target(self, job_id, target, target_value):
        node = '/{}/jobs/{}/targets/{}'.format(self.root, job_id, target)
        ret = self._create_node(node, target_value)
        return ret

    def create_new_task(self, job_id, target, task):
        node = '/{}/jobs/{}/targets/{}/tasks/{}'.format(
            self.root, job_id, target, task['task_id'])
        ret = self._create_node(node, task)
        return ret

    def create_job_signal(self, job_id):
        node = '/{}/signal/{}'.format(self.root, job_id)
        ret = self._create_node(node, uuid.uuid4().hex)
        return ret

    # GET
    def get_job_info(self, job_id):
        job_node = '{}/jobs/{}'.format(self.root, job_id)
        job_value, _ = self.zk.get(job_node)
        job_info = json.loads(job_value.decode())
        return job_info

    def get_target_info(self, job_id, target):
        target_node = '{}/jobs/{}/targets/{}'.format(self.root, job_id, target)
        target_value, _ = self.zk.get(target_node)
        target_info = json.loads(target_value.decode())
        return target_info

    def get_task_info(self, job_id, target, task_id):
        task_node = '{}/jobs/{}/targets/{}/tasks/{}'.format(
            self.root, job_id, target, task_id)
        task_value, _ = self.zk.get(task_node)
        task_info = json.loads(task_value.decode())
        return task_info

    # UPDATE
    def update_job_status(self, job_id, task):
        if job_id != '' and task is not None:
            node = self.root + '/signal/' + job_id
        else:
            raise Exception('job_id is ""')
        if self.zk.connected and self.is_job_exist(job_id):
            tx = self.zk.transaction()
            tx.set_data(node, task.encode())
            tx.commit()

    def handler_task(self, job_id, task_id, task_name, task_message, status):
        # 为不必传回target, 遍历任务节点
        if not self.is_job_exist(job_id):
            logger.error("can not find this jobid: {}".format(job_id))
            return False
        job_node = "{}/jobs/{}/targets".format(self.root, job_id)
        for target in self.zk.get_children(job_node):
            target_node = "{}/{}/tasks".format(job_node, target)
            for task in self.zk.get_children(target_node):
                if task == task_id:
                    task_node = "{}/{}".format(target_node, task)
                    task_value, _ = self.zk.get(task_node)
                    new_task_value = json.loads(task_value.decode())
                    new_task_value['status'] = status
                    tx = self.zk.transaction()
                    tx.set_data(task_node, json.dumps(new_task_value).encode())
                    tx.commit()
                    task_value, _ = self.zk.get(task_node)
                    return True
        logger.error("can not find this taskid: {} in {}".format(
            task_id, job_id))
        return False

    def get_target_by_taskid(self, job_id, task_id):
        if self.is_job_exist(job_id):
            node = "{}/jobs/{}/targets".format(self.root, job_id)
            for target in self.zk.get_children(node):
                path = '{}/{}/tasks'.format(node, target)
                for taskid in self.zk.get_children(path):
                    if taskid == task_id:
                        return target
            return False
        else:
            logger.error("job is not exist: job_id={}".format(job_id))

    def send_signal(self, job_id):
        node = '{}/signal/{}'.format(self.root, job_id)
        logger.info("send singal: {}".format(job_id))
        tx = self.zk.transaction()
        tx.set_data(node, uuid.uuid4().bytes)
        tx.commit()

    # DELETE
    def delete_job(self, job_id):
        node = "{}/jobs/{}".format(self.root, job_id)
        logger.info("delete job: job_id={}".format(job_id))
        self._delete_node(node)

    def delete_signal(self, job_id):
        node = '{}/signal/{}'.format(self.root, job_id)
        logger.info("delete singal: {}".format(job_id))
        self._delete_node(node)

    def delete_target(self, job_id, target):
        target_node = '{}/jobs/{}/targets/{}'.format(self.root, job_id, target)
        logger.info("delete target: job_id={}, target={}".format(
            job_id, target))
        self._delete_node(target_node)

    def delete_task(self, job_id, target, task_id):
        task_node = '{}/jobs/{}/targets/{}/tasks/{}'.format(
            self.root, job_id, target, task_id)
        logger.info("delete task: job_id ={}, target={}, task_id={}".format(
            job_id, target, task_id))
        self._delete_node(task_node)

#################################
# CALLBACK
## exsit CALLBACK

    def is_exist_callback(self, callback_node):
        node = "{}/callback/{}".format(self.root, callback_node)
        if self.zk.exists(node):
            return True
        else:
            return False

    ## INIT CALLBACK
    def init_callback_by_jobid(self, job_id):
        node = "{}/callback/{}".format(self.root, job_id)
        job_callback_value = {
            "job_id": job_id,
            "status": JobStatus.init.value,
            "messages": ""
        }
        callback = {
            "callback_level": CallbackLevel.job.value,
            "callback_info": job_callback_value
        }
        ret = self._create_node(node, callback)
        return ret

    def init_callback_by_target(self, job_id, target):
        node = "{}/callback/{}".format(self.root, job_id + "_" + target)
        target_callback_value = {
            "job_id": job_id,
            "target": target,
            "status": TargetStatus.init.value,
            "messages": ""
        }
        callback = {
            "callback_level": CallbackLevel.target.value,
            "callback_info": target_callback_value
        }
        ret = self._create_node(node, callback)
        return ret

    def init_callback_by_taskid(self, job_id, target, task_id, task_name):
        node = "{}/callback/{}".format(self.root, task_id)
        taskid_callback_value = {
            "job_id": job_id,
            "target": target,
            "task_name": task_name,
            "status": JobStatus.init.value,
            "messages": "",
        }
        callback = {
            "callback_level": CallbackLevel.task.value,
            "callback_info": taskid_callback_value
        }
        ret = self._create_node(node, callback)
        return ret

    ## GET CALLBACK
    def get_callback_info(self, callback):
        node = "{}/callback/{}".format(self.root, callback)
        if self.zk.exists(node):
            node_value = self._get_node(node)
            return node_value
        else:
            return False

    ## UPDATE CALLBACK
    def update_callback_by_jobid(self, job_id, status, messages=None):
        node = "{}/callback/{}".format(self.root, job_id)
        if not self.zk.exists(node):
            return False
        node_value = self._get_node(node)
        node_value["callback_info"]["status"] = status
        if messages is not None:
            node_value["callback_info"]["messages"] = messages
        ret = self._update_node(node, node_value)
        return ret

    def update_callback_by_target(self, job_id, target, status, messages=None):
        node = "{}/callback/{}".format(self.root, job_id + "_" + target)
        if not self.zk.exists(node):
            return False
        node_value = self._get_node(node)
        node_value["callback_info"]["status"] = status
        if messages is not None:
            node_value["callback_info"]["messages"] = messages
        ret = self._update_node(node, node_value)
        return ret

    def update_callback_by_taskid(self, job_id, taskid, status, messages=None):
        node = "{}/callback/{}".format(self.root, taskid)
        if not self.zk.exists(node):
            return False
        node_value = self._get_node(node)
        node_value["callback_info"]["status"] = status
        if messages is not None:
            node_value["callback_info"]["messages"] = messages
        ret = self._update_node(node, node_value)
        return ret

    ## DELETE CALLBACK
    def delete_callback_node(self, callback):
        node = "{}/callback/{}".format(self.root, callback)
        ret = self._delete_node(node)
        if ret:
            logger.info(
                "delete callback node success: callback={}".format(node))
        else:
            logger.error("delete callback node fail: callback={}".format(node))
        return ret
Beispiel #16
0
class USSMetadataManager(object):
    """Interfaces with the locking system to get, put, and delete USS metadata.

  Metadata gets/stores/deletes the USS information for a partiular grid,
  including current version number, a list of USSs with active operations,
  and the endpoints to get that information. Locking is assured through a
  snapshot token received when getting, and used when putting.
  """
    def __init__(self, connectionstring=DEFAULT_CONNECTION, testgroupid=None):
        """Initializes the class.

    Args:
      connectionstring:
        Zookeeper connection string - server:port,server:port,...
      testgroupid:
        ID to use if in test mode, none for normal mode
    """
        if testgroupid:
            self.set_testmode(testgroupid)
        if not connectionstring:
            connectionstring = DEFAULT_CONNECTION
        log.debug(
            'Creating metadata manager object and connecting to zookeeper...')
        try:
            if set(BAD_CHARACTER_CHECK) & set(connectionstring):
                raise ValueError
            self.zk = KazooClient(hosts=connectionstring,
                                  timeout=CONNECTION_TIMEOUT)
            self.zk.add_listener(self.zookeeper_connection_listener)
            self.zk.start()
            if testgroupid:
                self.delete_testdata(testgroupid)
        except KazooTimeoutError:
            log.error(
                'Unable to connect to zookeeper using %s connection string...',
                connectionstring)
            raise
        except ValueError:
            log.error('Connection string %s seems invalid...',
                      connectionstring)
            raise

    def __del__(self):
        log.debug(
            'Destroying metadata manager object and disconnecting from zk...')
        self.zk.stop()

    def set_verbose(self):
        log.setLevel(logging.DEBUG)

    def set_testmode(self, testgroupid='UNDEFINED_TESTER'):
        """Sets the mode to testing with the specific test ID, cannot be undone.

    Args:
      testgroupid: ID to use if in test mode, none for normal mode
    """
        global GRID_PATH
        global CONNECTION_TIMEOUT
        # Adjust parameters specifically for the test
        GRID_PATH = TEST_BASE_PREFIX + testgroupid + USS_BASE_PREFIX
        log.debug('Setting test path to %s...', GRID_PATH)
        CONNECTION_TIMEOUT = 1.0

    def zookeeper_connection_listener(self, state):
        if state == KazooState.LOST:
            # Register somewhere that the session was lost
            log.error('Lost connection with the zookeeper servers...')
        elif state == KazooState.SUSPENDED:
            # Handle being disconnected from Zookeeper
            log.error('Suspended connection with the zookeeper servers...')
        elif state == KazooState.CONNECTED:
            # Handle being connected/reconnected to Zookeeper
            log.info('Connection restored with the zookeeper servers...')

    def delete_testdata(self, testgroupid=None):
        """Removes the test data from the servers.

    Be careful when using this in parallel as it removes everything under
    the testgroupid, or everything if no tetgroupid is provided.

    Args:
      testgroupid: ID to use if in test mode, none will remove all test data
    """
        if testgroupid:
            path = TEST_BASE_PREFIX + testgroupid
        else:
            path = TEST_BASE_PREFIX
        self.zk.delete(path, recursive=True)

    def get(self, z, x, y):
        """Gets the metadata and snapshot token for a GridCell.

    Reads data from zookeeper, including a snapshot token. The
    snapshot token is used as a reference when writing to ensure
    the data has not been updated between read and write.

    Args:
      z: zoom level in slippy tile format
      x: x tile number in slippy tile format
      y: y tile number in slippy tile format
    Returns:
      JSend formatted response (https://labs.omniti.com/labs/jsend)
    """
        # TODO(hikevin): Change to use our own error codes and let the server
        #                   convert them to http error codes. For now, this is
        #                   at least in a standard JSend format.
        status = 500
        if slippy_util.validate_slippy(z, x, y):
            (content, metadata) = self._get_raw(z, x, y)
            if metadata:
                try:
                    m = uss_metadata.USSMetadata(content)
                    status = 200
                    result = {
                        'status': 'success',
                        'sync_token': metadata.last_modified_transaction_id,
                        'data': m.to_json()
                    }
                except ValueError:
                    status = 424
            else:
                status = 404
        else:
            status = 400
        if status != 200:
            result = self._format_status_code_to_jsend(status)
        return result

    def set(self, z, x, y, sync_token, uss_id, ws_scope, operation_format,
            operation_ws, earliest_operation, latest_operation):
        """Sets the metadata for a GridCell.

    Writes data, using the snapshot token for confirming data
    has not been updated since it was last read.

    Args:
      z: zoom level in slippy tile format
      x: x tile number in slippy tile format
      y: y tile number in slippy tile format
      sync_token: token retrieved in the original GET GridCellMetadata,
      uss_id: plain text identifier for the USS,
      ws_scope: scope to use to obtain OAuth token,
      operation_format: output format for operation ws (i.e. NASA, GUTMA),
      operation_ws: submitting USS endpoint where all flights in
        this cell can be retrieved from,
      earliest_operation: lower bound of active or planned flight timestamp,
        used for quick filtering conflicts.
      latest_operation: upper bound of active or planned flight timestamp,
        used for quick filtering conflicts.
    Returns:
      JSend formatted response (https://labs.omniti.com/labs/jsend)
    """
        if slippy_util.validate_slippy(z, x, y):
            # first we have to get the cell
            (content, metadata) = self._get_raw(z, x, y)
            if metadata:
                # Quick check of the token, another is done on the actual set to be sure
                #    but this check fails early and fast
                if str(metadata.last_modified_transaction_id) == str(
                        sync_token):
                    try:
                        m = uss_metadata.USSMetadata(content)
                        log.debug('Setting metadata for %s...', uss_id)
                        if not m.upsert_operator(
                                uss_id, ws_scope, operation_format,
                                operation_ws, earliest_operation,
                                latest_operation, z, x, y):
                            log.error(
                                'Failed setting operator for %s with token %s...',
                                uss_id, str(sync_token))
                            raise ValueError
                        status = self._set_raw(z, x, y, m, metadata.version)
                    except ValueError:
                        status = 424
                else:
                    status = 409
            else:
                status = 404
        else:
            status = 400
        if status == 200:
            # Success, now get the metadata back to send back
            result = self.get(z, x, y)
        else:
            result = self._format_status_code_to_jsend(status)
        return result

    def delete(self, z, x, y, uss_id):
        """Sets the metadata for a GridCell by removing the entry for the USS.

    Args:
      z: zoom level in slippy tile format
      x: x tile number in slippy tile format
      y: y tile number in slippy tile format
      uss_id: is the plain text identifier for the USS
    Returns:
      JSend formatted response (https://labs.omniti.com/labs/jsend)
    """
        status = 500
        if slippy_util.validate_slippy(z, x, y):
            # first we have to get the cell
            (content, metadata) = self._get_raw(z, x, y)
            if metadata:
                try:
                    m = uss_metadata.USSMetadata(content)
                    m.remove_operator(uss_id)
                    # TODO(pelletierb): Automatically retry on delete
                    status = self._set_raw(z, x, y, m, metadata.version)
                except ValueError:
                    status = 424
            else:
                status = 404
        else:
            status = 400
        if status == 200:
            # Success, now get the metadata back to send back
            (content, metadata) = self._get_raw(z, x, y)
            result = {
                'status': 'success',
                'sync_token': metadata.last_modified_transaction_id,
                'data': m.to_json()
            }
        else:
            result = self._format_status_code_to_jsend(status)
        return result

    def get_multi(self, z, grids):
        """Gets the metadata and snapshot token for multiple GridCells.

    Reads data from zookeeper, including a composite snapshot token. The
    snapshot token is used as a reference when writing to ensure
    the data has not been updated between read and write.

    Args:
      z: zoom level in slippy tile format
      grids: list of (x,y) tiles to retrieve
    Returns:
      JSend formatted response (https://labs.omniti.com/labs/jsend)
    """
        try:
            combined_meta, syncs = self._get_multi_raw(z, grids)
            log.debug('Found sync token %s for %d grids...',
                      self._hash_sync_tokens(syncs), len(syncs))
            result = {
                'status': 'success',
                'sync_token': self._hash_sync_tokens(syncs),
                'data': combined_meta.to_json()
            }
        except ValueError as e:
            result = self._format_status_code_to_jsend(400, e.message)
        except IndexError as e:
            result = self._format_status_code_to_jsend(404, e.message)
        return result

    def set_multi(self, z, grids, sync_token, uss_id, ws_scope,
                  operation_format, operation_ws, earliest_operation,
                  latest_operation):
        """Sets multiple GridCells metadata at once.

    Writes data, using the hashed snapshot token for confirming data
    has not been updated since it was last read.

    Args:
      z: zoom level in slippy tile format
      grids: list of (x,y) tiles to update
      sync_token: token retrieved in the original get_multi,
      uss_id: plain text identifier for the USS,
      ws_scope: scope to use to obtain OAuth token,
      operation_format: output format for operation ws (i.e. NASA, GUTMA),
      operation_ws: submitting USS endpoint where all flights in
        this cell can be retrieved from,
      earliest_operation: lower bound of active or planned flight timestamp,
        used for quick filtering conflicts.
      latest_operation: upper bound of active or planned flight timestamp,
        used for quick filtering conflicts.
    Returns:
      JSend formatted response (https://labs.omniti.com/labs/jsend)
    """
        log.debug('Setting multiple grid metadata for %s...', uss_id)
        try:
            # first, get the affected grid's sync tokens
            m, syncs = self._get_multi_raw(z, grids)
            del m
            # Quick check of the token, another is done on the actual set to be sure
            #    but this check fails early and fast
            log.debug('Found sync token %d for %d grids...',
                      self._hash_sync_tokens(syncs), len(syncs))
            if str(self._hash_sync_tokens(syncs)) == str(sync_token):
                log.debug('Composite sync_token matches, continuing...')
                self._set_multi_raw(z, grids, syncs, uss_id, ws_scope,
                                    operation_format, operation_ws,
                                    earliest_operation, latest_operation)
                log.debug('Completed updating multiple grids...')
            else:
                raise KeyError('Composite sync_token has changed')
            combined_meta, new_syncs = self._get_multi_raw(z, grids)
            result = {
                'status': 'success',
                'sync_token': self._hash_sync_tokens(new_syncs),
                'data': combined_meta.to_json()
            }
        except (KeyError, RolledBackError) as e:
            result = self._format_status_code_to_jsend(409, e.message)
        except ValueError as e:
            result = self._format_status_code_to_jsend(400, e.message)
        except IndexError as e:
            result = self._format_status_code_to_jsend(404, e.message)
        return result

    def delete_multi(self, uss_id, z, grids):
        """Sets multiple GridCells metadata by removing the entry for the USS.

    Reads data from zookeeper, including a snapshot token. The
    snapshot token is used as a reference when writing to ensure
    the data has not been updated between read and write.

    Args:
      uss_id: is the plain text identifier for the USS
      z: zoom level in slippy tile format
      grids: list of (x,y) tiles to delete
    Returns:
      JSend formatted response (https://labs.omniti.com/labs/jsend)
    """
        log.debug('Deleting multiple grid metadata for %s...', uss_id)
        try:
            if not uss_id:
                raise ValueError('Invalid uss_id for deleting multi')
            for x, y in grids:
                if slippy_util.validate_slippy(z, x, y):
                    (content, metadata) = self._get_raw(z, x, y)
                    if metadata:
                        m = uss_metadata.USSMetadata(content)
                        m.remove_operator(uss_id)
                        # TODO(pelletierb): Automatically retry on delete
                        status = self._set_raw(z, x, y, m, metadata.version)
                else:
                    raise ValueError('Invalid slippy grids for lookup')
            result = self.get_multi(z, grids)
        except ValueError as e:
            result = self._format_status_code_to_jsend(400, e.message)
        return result

    ######################################################################
    ################       INTERNAL FUNCTIONS    #########################
    ######################################################################
    def _get_raw(self, z, x, y):
        """Gets the raw content and metadata for a GridCell from zookeeper.

    Args:
      z: zoom level in slippy tile format
      x: x tile number in slippy tile format
      y: y tile number in slippy tile format
    Returns:
      content: USS metadata
      metadata: straight from zookeeper
    """
        path = '%s/%s/%s/%s/%s' % (GRID_PATH, str(z), str(x), str(y),
                                   USS_METADATA_FILE)
        log.debug('Getting metadata from zookeeper@%s...', path)
        try:
            c, m = self.zk.get(path)
        except NoNodeError:
            self.zk.ensure_path(path)
            c, m = self.zk.get(path)
        if c:
            log.debug('Received raw content and metadata from zookeeper: %s',
                      c)
        if m:
            log.debug('Received raw metadata from zookeeper: %s', m)
        return c, m

    def _set_raw(self, z, x, y, m, version):
        """Grabs the lock and updates the raw content for a GridCell in zookeeper.

    Args:
      z: zoom level in slippy tile format
      x: x tile number in slippy tile format
      y: y tile number in slippy tile format
      m: metadata object to write
      version: the metadata version verified from the sync_token match
    Returns:
      200 for success, 409 for conflict, 408 for unable to get the lock
    """
        path = '%s/%s/%s/%s/%s' % (GRID_PATH, str(z), str(x), str(y),
                                   USS_METADATA_FILE)
        try:
            log.debug('Setting metadata to %s...', str(m))
            self.zk.set(path, json.dumps(m.to_json()), version)
            status = 200
        except BadVersionError:
            log.error('Sync token updated before write for %s...', path)
            status = 409
        return status

    def _get_multi_raw(self, z, grids):
        """Gets the raw content and metadata for multiple GridCells from zookeeper.

    Args:
      z: zoom level in slippy tile format
      grids: list of (x,y) tiles to retrieve
    Returns:
      content: Combined USS metadata
      syncs: list of sync tokens in the same order as the grids
    Raises:
      IndexError: if it cannot find anything in zookeeper
      ValueError: if the grid data is not in the right format
    """
        log.debug('Getting multiple grid metadata for %s...', str(grids))
        combined_meta = None
        syncs = []
        for x, y in grids:
            if slippy_util.validate_slippy(z, x, y):
                (content, metadata) = self._get_raw(z, x, y)
                if metadata:
                    combined_meta += uss_metadata.USSMetadata(content)
                    syncs.append(metadata.last_modified_transaction_id)
                else:
                    raise IndexError('Unable to find metadata in platform')
            else:
                raise ValueError('Invalid slippy grids for lookup')
        if len(syncs) == 0:
            raise IndexError('Unable to find metadata in platform')
        return combined_meta, syncs

    def _set_multi_raw(self, z, grids, sync_tokens, uss_id, ws_scope,
                       operation_format, operation_ws, earliest_operation,
                       latest_operation):
        """Grabs the lock and updates the raw content for multiple GridCells

    Args:
      z: zoom level in slippy tile format
      grids: list of (x,y) tiles to retrieve
      sync_tokens: list of the sync tokens received during get operation
      uss_id: plain text identifier for the USS,
      ws_scope: scope to use to obtain OAuth token,
      operation_format: output format for operation ws (i.e. NASA, GUTMA),
      operation_ws: submitting USS endpoint where all flights in
        this cell can be retrieved from,
      earliest_operation: lower bound of active or planned flight timestamp,
        used for quick filtering conflicts.
      latest_operation: upper bound of active or planned flight timestamp,
        used for quick filtering conflicts.
    Raises:
      IndexError: if it cannot find anything in zookeeper
      ValueError: if the grid data is not in the right format
    """
        log.debug('Setting multiple grid metadata for %s...', str(grids))
        try:
            contents = []
            for i in range(len(grids)):
                # First, get and update them all in memory, validate the sync_token
                x = grids[i][0]
                y = grids[i][1]
                sync_token = sync_tokens[i]
                path = '%s/%s/%s/%s/%s' % (GRID_PATH, str(z), str(x), str(y),
                                           USS_METADATA_FILE)
                (content, metadata) = self._get_raw(z, x, y)
                if str(metadata.last_modified_transaction_id) == str(
                        sync_token):
                    log.debug('Sync_token matches for %d, %d...', x, y)
                    m = uss_metadata.USSMetadata(content)
                    if not m.upsert_operator(
                            uss_id, ws_scope, operation_format, operation_ws,
                            earliest_operation, latest_operation, z, x, y):
                        raise ValueError('Failed to set operator content')
                    contents.append((path, m, metadata.version))
                else:
                    log.error(
                        'Sync token from USS (%s) does not match token from zk (%s)...',
                        str(sync_token),
                        str(metadata.last_modified_transaction_id))
                    raise KeyError('Composite sync_token has changed')
            # Now, start a transaction to update them all
            #  the version will catch any changes and roll back any attempted
            #  updates to the grids
            log.debug('Starting transaction to write all grids at once...')
            t = self.zk.transaction()
            for path, m, version in contents:
                t.set_data(path, json.dumps(m.to_json()), version)
            log.debug('Committing transaction...')
            results = t.commit()
            if isinstance(results[0], RolledBackError):
                raise KeyError(
                    'Rolled back multi-grid transaction due to grid change')
            log.debug('Committed transaction successfully.')
        except (KeyError, ValueError, IndexError) as e:
            log.error('Error caught in set_multi_raw %s.', e.message)
            raise e

    def _format_status_code_to_jsend(self, status, message=None):
        """Formats a response based on HTTP status code.

    Args:
      status: HTTP status code
      message: optional message to override preset message for codes
    Returns:
      JSend formatted response (https://labs.omniti.com/labs/jsend)
    """

        if status == 200 or status == 204:
            result = {
                'status': 'success',
                'code': 204,
                'message': 'Empty data set.'
            }
        elif status == 400:
            result = {
                'status': 'fail',
                'code': status,
                'message': 'Parameters are not following the correct format.'
            }
        elif status == 404:
            result = {
                'status': 'fail',
                'code': status,
                'message': 'Unable to pull metadata from lock system.'
            }
        elif status == 408:
            result = {
                'status': 'fail',
                'code': status,
                'message': 'Timeout trying to get lock.'
            }
        elif status == 409:
            result = {
                'status':
                'fail',
                'code':
                status,
                'message':
                'Content in metadata has been updated since provided sync token.'
            }
        elif status == 424:
            result = {
                'status':
                'fail',
                'code':
                status,
                'message':
                'Content in metadata is not following JSON format guidelines.'
            }
        else:
            result = {
                'status': 'fail',
                'code': status,
                'message': 'Unknown error code occurred.'
            }
        if message:
            result['message'] = message
        return result

    @staticmethod
    def _hash_sync_tokens(syncs):
        """Hashes a list of sync tokens into a single, positive 64-bit int"""
        log.debug('Hashing syncs: %s', tuple(sorted(syncs)))
        return abs(hash(tuple(sorted(syncs))))
Beispiel #17
0
class ArcusZooKeeper:
    """
  ZooKeeper helper for Arcus
  """

    def __init__(self, hostports, timeout):
        self.hostports = hostports
        self.timeout = timeout
        self.zk = KazooClient(hosts=hostports, read_only=False)

    def start(self):
        self.zk.start()

    def stop(self):
        self.zk.stop()

    def init_structure(self):
        if self.zk.exists("/arcus"):
            print "init_arcus_structure: fail (/arcus exists)"
            return False

        tx = self.zk.transaction()
        tx.create("/arcus", b"")
        tx.create("/arcus/cache_list", b"")
        tx.create("/arcus/client_list", b"")
        tx.create("/arcus/cache_server_mapping", b"")
        results = tx.commit()
        if len(results) > 0:
            print results
            return False

        print "init_structure: success"
        return True

    def drop_structure(self):
        self.zk.delete("/arcus", recursive=True)
        print "delete_structure: success"

    def get_structure(self):
        return self.zk.get_children("/arcus")

    def get_mapping_for_service(self, service_code):
        result = []
        cache_list = "/arcus/cache_list/%s" % service_code
        mapping = "/arcus/cache_server_mapping"

        all = self.zk.get_children(mapping)
        for ipport in all:
            codes = self.zk.get_children("%s/%s" % (mapping, ipport))
            if len(codes) > 0:
                if codes[0] == service_code:
                    result.append("%s/%s" % (mapping, ipport))

        return result

    def get_config_for_service(self, service_code):
        cache_list = "/arcus/cache_list/%s" % service_code
        data, stat = self.zk.get(cache_list)
        return json.loads(data), data, stat

    def update_service_code(self, cluster):
        cache_list = "/arcus/cache_list/%s" % cluster["serviceCode"]
        client_list = "/arcus/client_list/%s" % cluster["serviceCode"]
        mapping = "/arcus/cache_server_mapping"

        try:
            delete_list = self.get_mapping_for_service(cluster["serviceCode"])

            # 0. Create a transaction
            tx = self.zk.transaction()

            # 1. Cache list
            if self.zk.exists(cache_list):
                c1, c2, c3 = self.get_config_for_service(cluster["serviceCode"])
                cluster["created"] = c1.get("created")
                cluster["modified"] = str(datetime.datetime.now())
                tx.set_data(cache_list, json.dumps(cluster))
            else:
                cluster["created"] = str(datetime.datetime.now())
                tx.create("/arcus/cache_list/%s" % cluster["serviceCode"], json.dumps(cluster))

            # 2. Client list
            if self.zk.exists(client_list):
                pass
            else:
                tx.create("/arcus/client_list/%s" % cluster["serviceCode"], b"")

            # 3. Mapping
            for each in delete_list:
                tx.delete("%s/%s" % (each, cluster["serviceCode"]))
                tx.delete(each)

            for server in cluster["servers"]:
                global_config = cluster.get("config", {})
                per_node_config = server.get("config", {})
                config = dict(global_config.items() + per_node_config.items())

                if len(config) == 0:
                    print "update_service_code: config not found for {0}".format(server)
                    continue

                map_ip = "/arcus/cache_server_mapping/%s:%s" % (server["ip"], config["port"])
                map_code = "%s/%s" % (map_ip, cluster["serviceCode"])

                tx.create(map_ip, json.dumps(config))
                tx.create(map_code, b"")

            # 4. Commit
            results = tx.commit()
            print results
        except Exception as e:
            traceback.print_exc()

    def delete_service_code(self, cluster):
        cache_list = "/arcus/cache_list/%s" % cluster["serviceCode"]
        client_list = "/arcus/client_list/%s" % cluster["serviceCode"]
        mapping = "/arcus/cache_server_mapping"

        try:
            delete_list = self.get_mapping_for_service(cluster["serviceCode"])

            # 0. Create a transaction
            tx = self.zk.transaction()

            # 1. Cache list
            tx.delete("/arcus/cache_list/%s" % cluster["serviceCode"])

            # 2. Client list
            tx.delete("/arcus/client_list/%s" % cluster["serviceCode"])

            # 3. Mapping
            for each in delete_list:
                tx.delete("%s/%s" % (each, cluster["serviceCode"]))
                tx.delete(each)

            # 4. Commit
            results = tx.commit()
            print results
        except Exception as e:
            traceback.print_exc()

    def list_all_service_code(self):
        result = []
        cache_list = "/arcus/cache_list"

        try:
            list = self.zk.get_children(cache_list)
            for each in list:
                result.append(self.list_service_code(each))
            return result
        except Exception as e:
            traceback.print_exc()

    def list_service_code(self, service_code):
        result = {}
        cache_list = "/arcus/cache_list/%s" % service_code
        client_list = "/arcus/client_list/%s" % service_code
        mapping = "/arcus/cache_server_mapping"

        try:
            data, stat = self.zk.get(cache_list)
            static_list = self.get_mapping_for_service(service_code)
            current_list = self.zk.get_children(cache_list)

            # sort the lists
            static_list.sort()
            current_list.sort()

            # get clusterConfig
            cluster = json.loads(data)

            # get clusterStatus
            static_set = set([each.split("/")[-1] for each in static_list])
            current_set = set([each.split("-")[0] for each in current_list])
            offline = static_set - current_set
            online = static_set - offline
            undefined = current_set - static_set

            result["serviceCode"] = service_code
            result["config"] = cluster
            result["online"] = list(online)
            result["offline"] = list(offline)
            result["undefined"] = list(undefined)
            result["created"] = cluster.get("created")
            result["modified"] = cluster.get("modified")
            return result

        except Exception as e:
            traceback.print_exc()
Beispiel #18
0
class Applier:
    def __init__(self):
        self._zk = KazooClient(hosts=f'{os.getenv("ZOOKEEPER_HOST")}:2181')
        self._logger = logging.getLogger(__name__)
        self._logger.setLevel(
            logging.getLevelName(os.getenv("LOG_LEVEL", "INFO")))
        ch = logging.StreamHandler()
        ch.setLevel(logging.getLevelName(os.getenv("LOG_LEVEL", "INFO")))
        self._logger.addHandler(ch)

    def start(self):
        self._logger.debug("Applier started")
        self._zk.start()
        self._attempt_to_apply_next_target()

        scheduler = BlockingScheduler(timezone="UTC")
        scheduler.add_job(self._attempt_to_apply_next_target,
                          'interval',
                          minutes=1)
        scheduler.start()

    def stop(self):
        self._zk.stop()

    def _attempt_to_apply_next_target(self):
        if (self._is_next_target_ready()):
            self._apply_next_target()

    def _apply_next_target(self):
        self._logger.info("Applying next target")
        self._zk.ensure_path(ZK_CURRENT_TARGET)
        next_target_id = self._zk.get(ZK_NEXT_TARGET)[0]

        tx = self._zk.transaction()
        tx.set_data(ZK_NEXT_TARGET, b'')
        tx.set_data(ZK_CURRENT_TARGET, next_target_id)
        tx.commit()

    def _is_next_target_ready(self):
        if (self._zk.exists(ZK_NEXT_TARGET) is None):
            return False

        next_target_id = self._zk.get(ZK_NEXT_TARGET)[0].decode()
        if (not next_target_id
                or self._zk.exists(f'/phrases/distributor/{next_target_id}') is
                None):
            return False

        partitions = self._zk.get_children(
            f'/phrases/distributor/{next_target_id}/partitions')
        if (not partitions):
            return False

        for partition in partitions:
            nodes_path = f'/phrases/distributor/{next_target_id}/partitions/{partition}/nodes'
            nodes = self._zk.get_children(nodes_path)

            if (len(nodes) < NUMBER_NODES_PER_PARTITION):
                return False

            for node in nodes:
                hostname = self._zk.get(f'{nodes_path}/{node}')[0].decode()
                if (not hostname):
                    return False

        return True
Beispiel #19
0
class Node(object):
    def __init__(self, pool, metadata=None, max_inflight_acquires=1, auto_acquire=True):
        self.pool = pool
        self.zk = KazooClient(pool.hosts, timeout=5, handler=SequentialGeventHandler())
        event = self.zk.start_async()
        event.wait(timeout=5)
        if not self.zk.connected:
            self.zk.stop()
            raise Exception('Failed to reach zookeeper')

        self.metadata = metadata or {}

        self.id = None
        self.path = None
        self.auto_acquire = auto_acquire
        self.max_resources = 0

        # Set of resources we own
        self.resources = set()
        self._resource_backoff = {}
        self._resources_acquiring = gevent.lock.Semaphore(max_inflight_acquires)

        # Callbacks
        self.on_acquire_resource = None
        self.on_release_resource = None

        self._anti_entropy_greenlet = gevent.spawn(self._anti_entropy)

    def disconnect(self):
        self.zk.disconnect()

    def acquire(self, resource):
        assert resource in self.pool.resources
        return self._try_takeover(resource, force=True)

    def release(self, resource):
        assert resource in self.resources

        # TODO: transaction here
        self.zk.delete(os.path.join(self.pool.path, 'leaders', resource))

    def leave(self):
        for resource in list(self.resources):
            self.release(resource)

    def join(self):
        path = self.zk.create(os.path.join(self.pool.path, 'nodes', ''), ephemeral=True, sequence=True)
        self.path = path
        self.id = path.rsplit('/', 1)[-1]

        # Watch for leadership changes so we can possibly take over
        ChildrenWatch(self.zk, os.path.join(self.pool.path, 'leaders'), self._on_leaders_change)

        # Now that we've joined, lets see if there are any dangling resources we
        #  can take ownership of
        gevent.spawn(self._check_for_takeover, delay=0)

    def _on_leaders_change(self, data):
        # TODO: debounce this instead of just sleeping
        gevent.spawn(self._check_for_takeover, delay=5)

    def _on_resource_leader_change(self, data, stat, event):
        if not event:
            return

        resource_name = event.path.split('/')[-1]
        if resource_name not in self.pool.resources:
            return

        if resource_name in self.resources:
            if event.type == 'DELETED' or data != self.id:
                self._resource_backoff[resource_name] = time.time()
                self.resources.remove(resource_name)
                if callable(self.on_release_resource):
                    self.on_release_resource(self, resource_name)
                return False

        if event.type == 'DELETED':
            self._try_takeover(resource_name)

    def _check_for_takeover(self, delay=5):
        if not self.auto_acquire:
            return
        time.sleep(delay)

        resources_with_leaders = set(self.zk.get_children(os.path.join(self.pool.path, 'leaders')))
        resources_without_leaders = self.pool.resources - resources_with_leaders

        for resource in resources_without_leaders:
            self._try_takeover(resource)

            # If we have more than the even-split number of resources, backoff a bit
            if len(self.resources) > len(self.pool.resources) / len(self.pool.nodes):
                time.sleep(1)

    def _try_takeover(self, resource, force=False):
        if self.max_resources and len(self.resources) >= self.max_resources:
            return False

        if not force and resource in self._resource_backoff:
            if time.time() - self._resource_backoff[resource] < 10:
                return False
            del self._resource_backoff[resource]

        if self._resources_acquiring.locked():
            return False

        with self._resources_acquiring:
            path = os.path.join(self.pool.path, 'leaders', resource)

            try:
                self.zk.create(path, unicode.encode(self.id), ephemeral=True)
            except NodeExistsError:
                if not force:
                    return False

                _, metadata = self.zk.get(path)
                transaction = self.zk.transaction()
                transaction.delete(path, version=metadata.version)
                transaction.create(path, unicode.encode(self.id), ephemeral=True)
                result = transaction.commit()
                if result[0] is not True or result[1] != path:
                    return False

            DataWatch(self.zk, path, self._on_resource_leader_change)
            self.resources.add(resource)
            if callable(self.on_acquire_resource):
                self.on_acquire_resource(self, resource)
            return True

    def balance(self):
        threshold = math.ceil(len(self.pool.resources) / (len(self.pool.nodes) * 1.0))
        our_value = len(self.resources)

        if our_value > threshold + 1:
            resource = random.choice(list(self.resources))
            self._resource_backoff[resource] = time.time()
            self.release(resource)

    def _anti_entropy(self):
        while True:
            time.sleep(10)
            self.balance()
Beispiel #20
0
class ZookClient(object):
    def __init__(self, zookeeper_host=None):

        logging.basicConfig()
        # Create a client and start it
        if zookeeper_host is None:
            zookeeper_host = "localhost:2181"
        self.zk = KazooClient(zookeeper_host)
        self.zk.start()
        self.tran = self.zk.transaction()
        self.CONST_BASE_PATH = "/dso/"
        self.CONST_ACCOUNTS_PATH = "accounts/"
        self.CONST_GROUPS_PATH = "groups/"
        self.CONST_USERS_PATH = "users/"
        self.CONST_HOSTS_PATH = "hosts/"
        self.CONST_SERVERS_PATH = "servers/"
        self.CONST_SERVICES_PATH = "services/"
        self.CONST_INSTANCES_PATH = "instances/"

        self.CONST_MAPPING_PATH = "Mapping/"
        self.CONST_IP2USER_PATH = "Ip2User/"
        self.CONST_MAC2USER_PATH = "Mac2User/"
        self.CONST_IP2VMPATH = "Ip2Vm/"
        self.CONST_USER2ACCOUNT_PATH = "User2Account/"
        self.CONST_AID2ANAME_PATH = "Aid2Aname/"
        self.CONST_VM_INFO_PATH = "VmInfo/"

        self.CONST_HOST_MAPPING = dict(vrouter='fedora',
                                       dns='ubuntu',
                                       firewall='ubuntu',
                                       ipsecvpn='centos',
                                       vpc='ubuntu')

    def create_accounts_path(self, accounts, **kwargs):
        # create accounts path
        accounts_path = self.CONST_BASE_PATH + self.CONST_ACCOUNTS_PATH
        self.zk.ensure_path(accounts_path)

        for account in accounts:
            # path = accounts_path + account.account_name
            path = accounts_path + account.id
            account_val = account.to_str()
            self.zk.ensure_path(path)
            self.zk.set(path, b"" + account_val.encode('utf8'))

    def create_account_path(self, account_info):
        """
            the path to a account
            :param account_info:
        """
        # create accounts path
        accounts_path = self.CONST_BASE_PATH + self.CONST_ACCOUNTS_PATH
        if not self.zk.exists(accounts_path):
            self.zk.ensure_path(accounts_path)

        path = accounts_path + account_info.id
        account_data = dict(id=account_info.id, name=account_info.account_name)
        self.zk.ensure_path(path)
        self.zk.set(path, b"" + json.dumps(account_data).encode('utf8'))

    def create_accountinfo_path(self, account_info):
        account_path = self.CONST_BASE_PATH + self.CONST_ACCOUNTS_PATH + account_info.id + "/"
        if not self.zk.exists(account_path):
            print "Path" + account_path + " does not exist."

        # create groups path
        groups_path = account_path + self.CONST_GROUPS_PATH
        self.zk.ensure_path(groups_path)

        # set each group
        for group_item in account_info.groups:
            self.create_usergroup_path(groups_path, group_item)

        # create service path
        services_path = account_path + self.CONST_SERVICES_PATH
        self.zk.ensure_path(services_path)

        # set services summary
        self.gen_services_summay(services_path, account_info.services)

        # set each service
        for service_item in account_info.services:
            self.create_service_path(services_path, service_item)

    def create_service_path(self, parent_path, service):
        if service is None or service.servicename is None:
            return
        service_path = parent_path + service.servicename + "/"
        # service_data = "id: " + service.id + "\nservicename: " + service.servicename
        service_data = dict(id=service.id, servicename=service.servicename)

        self.zk.ensure_path(service_path)
        self.zk.set(service_path,
                    b"" + json.dumps(service_data).encode('utf8'))

        # create instances path
        instances_path = service_path + self.CONST_INSTANCES_PATH
        self.zk.ensure_path(instances_path)

        # set each instance
        for instance_item in service.instances:
            self.create_instance_path(instances_path, instance_item,
                                      service.servicename)

    def create_instance_path(self, parent_path, instance, service_name):

        if instance.id is None:
            print "instance.id is None"
            return
        instance_path = parent_path + instance.id
        # instance_data = "id: " + instance.id + \
        #                 "\nmac: " + self.check_none(instance.mac) + \
        #                 "\nmanageip: " + self.check_none(instance.manageip) + \
        #                 "\npublicip: " + self.check_none(instance.publicip) + \
        #                 "\npublicgateway: " + self.check_none(instance.publicgateway) + \
        #                 "\npublicnetmask: " + self.check_none(instance.publicnetmask) + \
        #                 "\nserviceip: " + self.check_none(instance.serviceip) + \
        #                 "\nstatus: " + self.check_none(instance.status)

        instance_data = dict(id=instance.id,
                             mac=instance.mac,
                             manageip=instance.manageip,
                             publicip=instance.publicip,
                             publicgateway=instance.publicgateway,
                             publicnetmask=instance.publicnetmask,
                             serviceip=instance.serviceip,
                             status=instance.status,
                             user=self.CONST_HOST_MAPPING[service_name])

        self.zk.ensure_path(instance_path)
        # self.zk.set(instance_path, b"" + instance_data.encode('utf8'))
        self.zk.set(instance_path,
                    b"" + json.dumps(instance_data).encode('utf8'))
        return instance_path

    def create_usergroup_path(self, parent_path, group):
        if group is None or group.groupname is None:
            return
        group_path = parent_path + group.groupname + "/"
        # group_data = "id: " + group.id + "\ngroupname: " + \
        #              group.groupname + "\ndescription: " + self.check_none(group.description)

        group_data = dict(id=group.id,
                          groupname=group.groupname,
                          description=group.description)

        self.zk.ensure_path(group_path)
        self.zk.set(group_path, b"" + json.dumps(group_data).encode('utf8'))

        # create users path
        users_path = group_path + self.CONST_USERS_PATH
        self.zk.ensure_path(users_path)

        # set each user
        for user_item in group.users:
            self.create_user_path(users_path, user_item)

        # create servers path
        servers_path = group_path + self.CONST_SERVERS_PATH
        self.zk.ensure_path(servers_path)

        # set each server/host
        for server in group.servers:
            self.create_host_path(servers_path, server)

    def create_user_path(self, parent_path, user):
        if user is None or user.name is None:
            print "user is None"
            return
        user_path = parent_path + user.name + "/"
        # user_data = "id: " + user.id + "\nname: " + \
        #             user.name + "\nemail: " + self.check_none(user.email)

        user_data = dict(id=user.id, name=user.name, email=user.email)

        self.zk.ensure_path(user_path)
        self.zk.set(user_path, b"" + json.dumps(user_data).encode('utf8'))

        # create hosts path
        hosts_path = user_path + self.CONST_HOSTS_PATH
        self.zk.ensure_path(hosts_path)

        # set each hosts
        for host_item in user.hosts:
            self.create_host_path(hosts_path, host_item)

    def create_host_path(self, parent_path, host):
        if host is None or host.mac is None:
            return

        host_path = parent_path + host.mac

        # host_data = "ip: " + self.check_none(host.ip) + "\nmac: " + \
        #             host.mac + "\nhost_name: " + \
        #             self.check_none(host.mac) + \
        #             "\ndomain: " + self.check_none(host.domain)

        host_data = dict(ip=host.ip,
                         mac=host.mac,
                         host_name=host.host_name,
                         domain=host.domain)

        self.zk.ensure_path(host_path)
        self.zk.set(host_path, b"" + json.dumps(host_data).encode('utf8'))
        return host_path

    def delete_dso_path(self):
        self.zk.delete("/dso", recursive=True)
        self.zk.ensure_path("/dso")

    def delete_account_path(self, account_id):
        account_path = self.CONST_BASE_PATH + self.CONST_ACCOUNTS_PATH + account_id
        self.zk.delete(account_path, recursive=True)

    def delete_account_mapping_path(self, account_id):

        ip2vm_path = self.CONST_BASE_PATH + self.CONST_MAPPING_PATH + self.CONST_IP2VMPATH + account_id
        ip2user_path = self.CONST_BASE_PATH + self.CONST_MAPPING_PATH + self.CONST_IP2USER_PATH + account_id
        mac2user_path = self.CONST_BASE_PATH + self.CONST_MAPPING_PATH + self.CONST_MAC2USER_PATH + account_id

        self.zk.delete(ip2vm_path, recursive=True)
        self.zk.delete(ip2user_path, recursive=True)
        self.zk.delete(mac2user_path, recursive=True)

    @staticmethod
    def check_none(data):
        return "None" if data is None else data

    def create_user2account_path(self, user2account):
        path = self.CONST_BASE_PATH + self.CONST_MAPPING_PATH + self.CONST_USER2ACCOUNT_PATH
        self.zk.ensure_path(path)
        self.zk.set(path, b"" + json.dumps(user2account).encode('utf8'))

    def create_aid2aname_path(self, aid2aname):
        if not self.zk.exists(self.CONST_BASE_PATH + self.CONST_MAPPING_PATH):
            self.zk.ensure_path(self.CONST_BASE_PATH + self.CONST_MAPPING_PATH)
        path = self.CONST_BASE_PATH + self.CONST_MAPPING_PATH + self.CONST_AID2ANAME_PATH
        self.zk.ensure_path(path)
        self.zk.set(path, b"" + json.dumps(aid2aname).encode('utf8'))

    def create_ip2user_path(self, account_info, ip2user):

        if not self.zk.exists(self.CONST_BASE_PATH + self.CONST_MAPPING_PATH):
            self.zk.ensure_path(self.CONST_BASE_PATH + self.CONST_MAPPING_PATH)

        ip2user_path = self.CONST_BASE_PATH + self.CONST_MAPPING_PATH + self.CONST_IP2USER_PATH
        self.zk.ensure_path(ip2user_path)

        account_path = ip2user_path + account_info.id + "/"
        self.zk.ensure_path(account_path)
        account_data = {"account_name": account_info.account_name}
        self.zk.set(account_path,
                    b"" + json.dumps(account_data).encode('utf8'))

        for ip in ip2user.keys():
            if ip is None:
                print "Ip is None"
                continue
            ip_path = account_path + ip

            self.zk.ensure_path(ip_path)
            self.zk.set(ip_path, b"" + json.dumps(ip2user[ip]).encode('utf8'))

    def create_mac2user_path(self, account_info, mac2user):

        mac2user_path = self.CONST_BASE_PATH + self.CONST_MAPPING_PATH + self.CONST_MAC2USER_PATH
        self.zk.ensure_path(mac2user_path)

        account_path = mac2user_path + account_info.id + "/"
        self.zk.ensure_path(account_path)
        account_data = {"account_name": account_info.account_name}
        self.zk.set(account_path,
                    b"" + json.dumps(account_data).encode('utf8'))

        for mac in mac2user.keys():
            if mac is None:
                print "mac is None"
                continue
            ip_path = account_path + mac

            self.zk.ensure_path(ip_path)
            self.zk.set(ip_path,
                        b"" + json.dumps(mac2user[mac]).encode('utf8'))

    def gen_mapping_pre_account(self, account_info, vpn_clients):
        self.zk.ensure_path(self.CONST_BASE_PATH + self.CONST_MAPPING_PATH)
        self.zk.ensure_path(self.CONST_BASE_PATH + self.CONST_MAPPING_PATH +
                            self.CONST_IP2VMPATH)
        self.zk.ensure_path(self.CONST_BASE_PATH + self.CONST_MAPPING_PATH +
                            self.CONST_MAC2USER_PATH)

        user2account = {}
        ip2user = {}
        mac2user = {}
        for group in account_info.groups:
            for user in group.users:
                user2account[user.id] = account_info.id
                for host in user.hosts:
                    user_data = dict(userid=user.id,
                                     username=user.name,
                                     useremail=user.email,
                                     groupname=group.groupname)
                    if host.ip is not None:
                        ip2user[host.ip] = user_data
                        mac2user[host.mac] = user_data

        # add vpn info
        for vpn in vpn_clients:
            vpn_user_data = dict(userid='vpn_user',
                                 username=vpn.user_name,
                                 useremail='',
                                 groupname=vpn.group)
            if vpn.ip is not None:
                vpn_ip = vpn.ip
                if vpn_ip.__contains__("/"):
                    vpn_ip = vpn_ip.split("/")[0]

                ip2user[vpn_ip] = vpn_user_data

        # create node
        self.create_ip2user_path(account_info, ip2user)

        # create node mac2user
        self.create_mac2user_path(account_info, mac2user)

        # not used now
        # self.create_user2account_path(user2account)

        # create ip2vm
        self.create_ip2vm_path(account_info)

    def gen_vm_path(self, vm_info):
        if not self.zk.exists(self.CONST_BASE_PATH + self.CONST_MAPPING_PATH):
            self.zk.ensure_path(self.CONST_BASE_PATH + self.CONST_MAPPING_PATH)
        vms_path = self.CONST_BASE_PATH + self.CONST_MAPPING_PATH + self.CONST_VM_INFO_PATH
        self.zk.ensure_path(vms_path)
        for vm in vm_info:
            vm_node_path = vms_path + vm.manage_ip.replace('/', '-')
            vm_dict = dict(id=vm.id, type=vm.type, status=vm.status)

            self.zk.ensure_path(vm_node_path)
            self.zk.set(vm_node_path, b"" + json.dumps(vm_dict).encode('utf8'))

    def create_mapping_accounts(self, accounts):
        aid2aname = {}
        for account in accounts:
            aid2aname[account.id] = account.account_name
        self.create_aid2aname_path(aid2aname)

    def gen_services_summay(self, path, services):
        services_data = {}
        for service_item in services:
            instance_array = ""
            for instance in service_item.instances:
                instance_data = "<table class=\"table\">" \
                                "<tr>" \
                                "<td>manageip</td><td>" + self.check_none(instance.manageip) \
                                + "</td>" \
                                "</tr>" \
                                "<tr>" \
                                "<td>publicip</td><td>" + self.check_none(
                    instance.publicip) + "</td>" \
                                         "</tr>" \
                                         "<tr>" \
                                         "<td>publicgateway</td><td>" + self.check_none(
                    instance.publicgateway) + "</td>" \
                                              "</tr>" \
                                              "<tr>" \
                                              "<td>publicnetmask</td><td>" + self.check_none(
                    instance.publicnetmask) + "</td>" \
                                              "</tr>" \
                                              "<tr>" \
                                              "<td>serviceip</td><td>" + self.check_none(instance.serviceip) \
                                            + "</td>" \
                                              "</tr>" \
                                              "<tr><td>status</td><td>" + self.check_none(
                    instance.status) + "</td>" \
                                       "</tr>" \
                                            "<tr><td>user</td><td>" + \
                                self.check_none(self.CONST_HOST_MAPPING[service_item.servicename]) \
                                     + "</td>" \
                                       "</tr>" \
                                       "</table>"

                instance_array += instance_data + "\n"
            services_data[service_item.servicename] = instance_array

        self.zk.set(path, b"" + json.dumps(services_data).encode('utf8'))

    def create_ip2vm_path(self, account_info):

        base_path = self.CONST_BASE_PATH + self.CONST_MAPPING_PATH + self.CONST_IP2VMPATH
        path = base_path + account_info.id + "/"
        # self.zk.ensure_path(path)

        account_data = dict(accountname=account_info.account_name)
        self.zk.create(path, b"" + json.dumps(account_data).encode('utf8'))

        self.zk.ensure_path(path)
        for service_item in account_info.services:
            service_name = service_item.servicename
            for instance in service_item.instances:
                if instance.manageip is None:
                    continue
                manageip = instance.manageip
                if instance.manageip.__contains__('/'):
                    manageip = manageip[:manageip.find('/')]

                vm_path = path + manageip
                host_name = account_info.id + '-' + service_name

                instance_data = dict(id=instance.id,
                                     mac=instance.mac,
                                     manageip=instance.manageip,
                                     publicip=instance.publicip,
                                     publicgateway=instance.publicgateway,
                                     publicnetmask=instance.publicnetmask,
                                     serviceip=instance.serviceip,
                                     status=instance.status,
                                     servicename=service_name,
                                     hostname=host_name)
                # self.zk.ensure_path(vm_path)
                self.zk.create(vm_path,
                               b"" + json.dumps(instance_data).encode('utf8'))

    def get_all_account(self):
        accounts_path = self.CONST_BASE_PATH + self.CONST_ACCOUNTS_PATH
        account_list = self.zk.get_children(accounts_path)
        return account_list

    def commit(self):
        self.tran.commit()

    def stopZooK(self):
        # In the end, stop it
        self.zk.stop()
Beispiel #21
0
class ZookClient(object):
    def __init__(self, zookeeper_host=None):

        logging.basicConfig()
        # Create a client and start it
        if zookeeper_host is None:
            zookeeper_host = "localhost:2181"
        self.zk = KazooClient(zookeeper_host)
        self.zk.start()
        self.tran = self.zk.transaction()
        self.CONST_BASE_PATH = "/dso/"
        self.CONST_ACCOUNTS_PATH = "accounts/"
        self.CONST_GROUPS_PATH = "groups/"
        self.CONST_USERS_PATH = "users/"
        self.CONST_HOSTS_PATH = "hosts/"
        self.CONST_SERVERS_PATH = "servers/"
        self.CONST_SERVICES_PATH = "services/"
        self.CONST_INSTANCES_PATH = "instances/"

        self.CONST_MAPPING_PATH = "Mapping/"
        self.CONST_IP2USER_PATH = "Ip2User/"
        self.CONST_MAC2USER_PATH = "Mac2User/"
        self.CONST_IP2VMPATH = "Ip2Vm/"
        self.CONST_USER2ACCOUNT_PATH = "User2Account/"
        self.CONST_AID2ANAME_PATH = "Aid2Aname/"
        self.CONST_VM_INFO_PATH = "VmInfo/"

        self.CONST_HOST_MAPPING = dict(
            vrouter="fedora", dns="ubuntu", firewall="ubuntu", ipsecvpn="centos", vpc="ubuntu"
        )

    def create_accounts_path(self, accounts, **kwargs):
        # create accounts path
        accounts_path = self.CONST_BASE_PATH + self.CONST_ACCOUNTS_PATH
        self.zk.ensure_path(accounts_path)

        for account in accounts:
            # path = accounts_path + account.account_name
            path = accounts_path + account.id
            account_val = account.to_str()
            self.zk.ensure_path(path)
            self.zk.set(path, b"" + account_val.encode("utf8"))

    def create_account_path(self, account_info):
        """
            the path to a account
            :param account_info:
        """
        # create accounts path
        accounts_path = self.CONST_BASE_PATH + self.CONST_ACCOUNTS_PATH
        if not self.zk.exists(accounts_path):
            self.zk.ensure_path(accounts_path)

        path = accounts_path + account_info.id
        account_data = dict(id=account_info.id, name=account_info.account_name)
        self.zk.ensure_path(path)
        self.zk.set(path, b"" + json.dumps(account_data).encode("utf8"))

    def create_accountinfo_path(self, account_info):
        account_path = self.CONST_BASE_PATH + self.CONST_ACCOUNTS_PATH + account_info.id + "/"
        if not self.zk.exists(account_path):
            print "Path" + account_path + " does not exist."

        # create groups path
        groups_path = account_path + self.CONST_GROUPS_PATH
        self.zk.ensure_path(groups_path)

        # set each group
        for group_item in account_info.groups:
            self.create_usergroup_path(groups_path, group_item)

        # create service path
        services_path = account_path + self.CONST_SERVICES_PATH
        self.zk.ensure_path(services_path)

        # set services summary
        self.gen_services_summay(services_path, account_info.services)

        # set each service
        for service_item in account_info.services:
            self.create_service_path(services_path, service_item)

    def create_service_path(self, parent_path, service):
        if service is None or service.servicename is None:
            return
        service_path = parent_path + service.servicename + "/"
        # service_data = "id: " + service.id + "\nservicename: " + service.servicename
        service_data = dict(id=service.id, servicename=service.servicename)

        self.zk.ensure_path(service_path)
        self.zk.set(service_path, b"" + json.dumps(service_data).encode("utf8"))

        # create instances path
        instances_path = service_path + self.CONST_INSTANCES_PATH
        self.zk.ensure_path(instances_path)

        # set each instance
        for instance_item in service.instances:
            self.create_instance_path(instances_path, instance_item, service.servicename)

    def create_instance_path(self, parent_path, instance, service_name):

        if instance.id is None:
            print "instance.id is None"
            return
        instance_path = parent_path + instance.id
        # instance_data = "id: " + instance.id + \
        #                 "\nmac: " + self.check_none(instance.mac) + \
        #                 "\nmanageip: " + self.check_none(instance.manageip) + \
        #                 "\npublicip: " + self.check_none(instance.publicip) + \
        #                 "\npublicgateway: " + self.check_none(instance.publicgateway) + \
        #                 "\npublicnetmask: " + self.check_none(instance.publicnetmask) + \
        #                 "\nserviceip: " + self.check_none(instance.serviceip) + \
        #                 "\nstatus: " + self.check_none(instance.status)

        instance_data = dict(
            id=instance.id,
            mac=instance.mac,
            manageip=instance.manageip,
            publicip=instance.publicip,
            publicgateway=instance.publicgateway,
            publicnetmask=instance.publicnetmask,
            serviceip=instance.serviceip,
            status=instance.status,
            user=self.CONST_HOST_MAPPING[service_name],
        )

        self.zk.ensure_path(instance_path)
        # self.zk.set(instance_path, b"" + instance_data.encode('utf8'))
        self.zk.set(instance_path, b"" + json.dumps(instance_data).encode("utf8"))
        return instance_path

    def create_usergroup_path(self, parent_path, group):
        if group is None or group.groupname is None:
            return
        group_path = parent_path + group.groupname + "/"
        # group_data = "id: " + group.id + "\ngroupname: " + \
        #              group.groupname + "\ndescription: " + self.check_none(group.description)

        group_data = dict(id=group.id, groupname=group.groupname, description=group.description)

        self.zk.ensure_path(group_path)
        self.zk.set(group_path, b"" + json.dumps(group_data).encode("utf8"))

        # create users path
        users_path = group_path + self.CONST_USERS_PATH
        self.zk.ensure_path(users_path)

        # set each user
        for user_item in group.users:
            self.create_user_path(users_path, user_item)

        # create servers path
        servers_path = group_path + self.CONST_SERVERS_PATH
        self.zk.ensure_path(servers_path)

        # set each server/host
        for server in group.servers:
            self.create_host_path(servers_path, server)

    def create_user_path(self, parent_path, user):
        if user is None or user.name is None:
            print "user is None"
            return
        user_path = parent_path + user.name + "/"
        # user_data = "id: " + user.id + "\nname: " + \
        #             user.name + "\nemail: " + self.check_none(user.email)

        user_data = dict(id=user.id, name=user.name, email=user.email)

        self.zk.ensure_path(user_path)
        self.zk.set(user_path, b"" + json.dumps(user_data).encode("utf8"))

        # create hosts path
        hosts_path = user_path + self.CONST_HOSTS_PATH
        self.zk.ensure_path(hosts_path)

        # set each hosts
        for host_item in user.hosts:
            self.create_host_path(hosts_path, host_item)

    def create_host_path(self, parent_path, host):
        if host is None or host.mac is None:
            return

        host_path = parent_path + host.mac

        # host_data = "ip: " + self.check_none(host.ip) + "\nmac: " + \
        #             host.mac + "\nhost_name: " + \
        #             self.check_none(host.mac) + \
        #             "\ndomain: " + self.check_none(host.domain)

        host_data = dict(ip=host.ip, mac=host.mac, host_name=host.host_name, domain=host.domain)

        self.zk.ensure_path(host_path)
        self.zk.set(host_path, b"" + json.dumps(host_data).encode("utf8"))
        return host_path

    def delete_dso_path(self):
        self.zk.delete("/dso", recursive=True)
        self.zk.ensure_path("/dso")

    def delete_account_path(self, account_id):
        account_path = self.CONST_BASE_PATH + self.CONST_ACCOUNTS_PATH + account_id
        self.zk.delete(account_path, recursive=True)

    def delete_account_mapping_path(self, account_id):

        ip2vm_path = self.CONST_BASE_PATH + self.CONST_MAPPING_PATH + self.CONST_IP2VMPATH + account_id
        ip2user_path = self.CONST_BASE_PATH + self.CONST_MAPPING_PATH + self.CONST_IP2USER_PATH + account_id
        mac2user_path = self.CONST_BASE_PATH + self.CONST_MAPPING_PATH + self.CONST_MAC2USER_PATH + account_id

        self.zk.delete(ip2vm_path, recursive=True)
        self.zk.delete(ip2user_path, recursive=True)
        self.zk.delete(mac2user_path, recursive=True)

    @staticmethod
    def check_none(data):
        return "None" if data is None else data

    def create_user2account_path(self, user2account):
        path = self.CONST_BASE_PATH + self.CONST_MAPPING_PATH + self.CONST_USER2ACCOUNT_PATH
        self.zk.ensure_path(path)
        self.zk.set(path, b"" + json.dumps(user2account).encode("utf8"))

    def create_aid2aname_path(self, aid2aname):
        if not self.zk.exists(self.CONST_BASE_PATH + self.CONST_MAPPING_PATH):
            self.zk.ensure_path(self.CONST_BASE_PATH + self.CONST_MAPPING_PATH)
        path = self.CONST_BASE_PATH + self.CONST_MAPPING_PATH + self.CONST_AID2ANAME_PATH
        self.zk.ensure_path(path)
        self.zk.set(path, b"" + json.dumps(aid2aname).encode("utf8"))

    def create_ip2user_path(self, account_info, ip2user):

        if not self.zk.exists(self.CONST_BASE_PATH + self.CONST_MAPPING_PATH):
            self.zk.ensure_path(self.CONST_BASE_PATH + self.CONST_MAPPING_PATH)

        ip2user_path = self.CONST_BASE_PATH + self.CONST_MAPPING_PATH + self.CONST_IP2USER_PATH
        self.zk.ensure_path(ip2user_path)

        account_path = ip2user_path + account_info.id + "/"
        self.zk.ensure_path(account_path)
        account_data = {"account_name": account_info.account_name}
        self.zk.set(account_path, b"" + json.dumps(account_data).encode("utf8"))

        for ip in ip2user.keys():
            if ip is None:
                print "Ip is None"
                continue
            ip_path = account_path + ip

            self.zk.ensure_path(ip_path)
            self.zk.set(ip_path, b"" + json.dumps(ip2user[ip]).encode("utf8"))

    def create_mac2user_path(self, account_info, mac2user):

        mac2user_path = self.CONST_BASE_PATH + self.CONST_MAPPING_PATH + self.CONST_MAC2USER_PATH
        self.zk.ensure_path(mac2user_path)

        account_path = mac2user_path + account_info.id + "/"
        self.zk.ensure_path(account_path)
        account_data = {"account_name": account_info.account_name}
        self.zk.set(account_path, b"" + json.dumps(account_data).encode("utf8"))

        for mac in mac2user.keys():
            if mac is None:
                print "mac is None"
                continue
            ip_path = account_path + mac

            self.zk.ensure_path(ip_path)
            self.zk.set(ip_path, b"" + json.dumps(mac2user[mac]).encode("utf8"))

    def gen_mapping_pre_account(self, account_info, vpn_clients):
        self.zk.ensure_path(self.CONST_BASE_PATH + self.CONST_MAPPING_PATH)
        self.zk.ensure_path(self.CONST_BASE_PATH + self.CONST_MAPPING_PATH + self.CONST_IP2VMPATH)
        self.zk.ensure_path(self.CONST_BASE_PATH + self.CONST_MAPPING_PATH + self.CONST_MAC2USER_PATH)

        user2account = {}
        ip2user = {}
        mac2user = {}
        for group in account_info.groups:
            for user in group.users:
                user2account[user.id] = account_info.id
                for host in user.hosts:
                    user_data = dict(
                        userid=user.id, username=user.name, useremail=user.email, groupname=group.groupname
                    )
                    if host.ip is not None:
                        ip2user[host.ip] = user_data
                        mac2user[host.mac] = user_data

        # add vpn info
        for vpn in vpn_clients:
            vpn_user_data = dict(userid="vpn_user", username=vpn.user_name, useremail="", groupname=vpn.group)
            if vpn.ip is not None:
                vpn_ip = vpn.ip
                if vpn_ip.__contains__("/"):
                    vpn_ip = vpn_ip.split("/")[0]

                ip2user[vpn_ip] = vpn_user_data

        # create node
        self.create_ip2user_path(account_info, ip2user)

        # create node mac2user
        self.create_mac2user_path(account_info, mac2user)

        # not used now
        # self.create_user2account_path(user2account)

        # create ip2vm
        self.create_ip2vm_path(account_info)

    def gen_vm_path(self, vm_info):
        if not self.zk.exists(self.CONST_BASE_PATH + self.CONST_MAPPING_PATH):
            self.zk.ensure_path(self.CONST_BASE_PATH + self.CONST_MAPPING_PATH)
        vms_path = self.CONST_BASE_PATH + self.CONST_MAPPING_PATH + self.CONST_VM_INFO_PATH
        self.zk.ensure_path(vms_path)
        for vm in vm_info:
            vm_node_path = vms_path + vm.manage_ip.replace("/", "-")
            vm_dict = dict(id=vm.id, type=vm.type, status=vm.status)

            self.zk.ensure_path(vm_node_path)
            self.zk.set(vm_node_path, b"" + json.dumps(vm_dict).encode("utf8"))

    def create_mapping_accounts(self, accounts):
        aid2aname = {}
        for account in accounts:
            aid2aname[account.id] = account.account_name
        self.create_aid2aname_path(aid2aname)

    def gen_services_summay(self, path, services):
        services_data = {}
        for service_item in services:
            instance_array = ""
            for instance in service_item.instances:
                instance_data = (
                    '<table class="table">'
                    "<tr>"
                    "<td>manageip</td><td>" + self.check_none(instance.manageip) + "</td>"
                    "</tr>"
                    "<tr>"
                    "<td>publicip</td><td>" + self.check_none(instance.publicip) + "</td>"
                    "</tr>"
                    "<tr>"
                    "<td>publicgateway</td><td>" + self.check_none(instance.publicgateway) + "</td>"
                    "</tr>"
                    "<tr>"
                    "<td>publicnetmask</td><td>" + self.check_none(instance.publicnetmask) + "</td>"
                    "</tr>"
                    "<tr>"
                    "<td>serviceip</td><td>" + self.check_none(instance.serviceip) + "</td>"
                    "</tr>"
                    "<tr><td>status</td><td>" + self.check_none(instance.status) + "</td>"
                    "</tr>"
                    "<tr><td>user</td><td>"
                    + self.check_none(self.CONST_HOST_MAPPING[service_item.servicename])
                    + "</td>"
                    "</tr>"
                    "</table>"
                )

                instance_array += instance_data + "\n"
            services_data[service_item.servicename] = instance_array

        self.zk.set(path, b"" + json.dumps(services_data).encode("utf8"))

    def create_ip2vm_path(self, account_info):

        base_path = self.CONST_BASE_PATH + self.CONST_MAPPING_PATH + self.CONST_IP2VMPATH
        path = base_path + account_info.id + "/"
        # self.zk.ensure_path(path)

        account_data = dict(accountname=account_info.account_name)
        self.zk.create(path, b"" + json.dumps(account_data).encode("utf8"))

        self.zk.ensure_path(path)
        for service_item in account_info.services:
            service_name = service_item.servicename
            for instance in service_item.instances:
                if instance.manageip is None:
                    continue
                manageip = instance.manageip
                if instance.manageip.__contains__("/"):
                    manageip = manageip[: manageip.find("/")]

                vm_path = path + manageip
                host_name = account_info.id + "-" + service_name

                instance_data = dict(
                    id=instance.id,
                    mac=instance.mac,
                    manageip=instance.manageip,
                    publicip=instance.publicip,
                    publicgateway=instance.publicgateway,
                    publicnetmask=instance.publicnetmask,
                    serviceip=instance.serviceip,
                    status=instance.status,
                    servicename=service_name,
                    hostname=host_name,
                )
                # self.zk.ensure_path(vm_path)
                self.zk.create(vm_path, b"" + json.dumps(instance_data).encode("utf8"))

    def get_all_account(self):
        accounts_path = self.CONST_BASE_PATH + self.CONST_ACCOUNTS_PATH
        account_list = self.zk.get_children(accounts_path)
        return account_list

    def commit(self):
        self.tran.commit()

    def stopZooK(self):
        # In the end, stop it
        self.zk.stop()
Beispiel #22
0
class ZkSyncManager(object):

    RETRIES = 2
    LOCK_TIMEOUT = 3

    def __init__(self,
                 host='127.0.0.1:2181',
                 lock_path_prefix='/mastermind/locks/'):
        self.client = KazooClient(host, timeout=3)
        logger.info(
            'Connecting to zookeeper host {}, lock_path_prefix: {}'.format(
                host, lock_path_prefix))
        try:
            self.client.start()
        except Exception as e:
            logger.error(e)
            raise

        self._retry = KazooRetry(max_tries=self.RETRIES)

        self.lock_path_prefix = lock_path_prefix

    @contextmanager
    def lock(self, lockid, blocking=True, timeout=LOCK_TIMEOUT):
        lock = Lock(self.client, self.lock_path_prefix + lockid)
        try:
            acquired = lock.acquire(blocking=blocking, timeout=timeout)
            logger.debug('Lock {0} acquired: {1}'.format(lockid, acquired))
            if not acquired:
                # TODO: Change exception time or set all required parameters for
                # this type of exception
                raise LockAlreadyAcquiredError(lock_id=lockid)
            yield
        except LockTimeout:
            logger.info(
                'Failed to acquire lock {} due to timeout ({} seconds)'.format(
                    lockid, timeout))
            raise LockFailedError(lock_id=lockid)
        except LockAlreadyAcquiredError:
            raise
        except LockError as e:
            logger.error('Failed to acquire lock {0}: {1}\n{2}'.format(
                lockid, e, traceback.format_exc()))
            raise
        finally:
            lock.release()

    def persistent_locks_acquire(self, locks, data=''):
        try:
            retry = self._retry.copy()
            result = retry(self._inner_persistent_locks_acquire,
                           locks=locks,
                           data=data)
        except RetryFailedError:
            raise LockError(
                'Failed to acquire persistent locks {} after several retries'.
                format(locks))
        except KazooException as e:
            logger.error(
                'Failed to fetch persistent locks {0}: {1}\n{2}'.format(
                    locks, e, traceback.format_exc()))
            raise LockError
        return result

    def _inner_persistent_locks_acquire(self, locks, data):

        ensured_paths = set()

        tr = self.client.transaction()
        for lockid in locks:
            path = self.lock_path_prefix + lockid
            parts = path.rsplit('/', 1)
            if len(parts) == 2 and parts[0] not in ensured_paths:
                self.client.ensure_path(parts[0])
                ensured_paths.add(parts[0])
            tr.create(path, data)

        failed = False
        failed_locks = []
        result = tr.commit()
        for i, res in enumerate(result):
            if isinstance(res, ZookeeperError):
                failed = True
            if isinstance(res, NodeExistsError):
                failed_locks.append(locks[i])

        if failed_locks:
            holders = []
            for f in failed_locks:
                # TODO: fetch all holders with 1 transaction request
                holders.append((f, self.client.get(self.lock_path_prefix + f)))
            foreign_holders = [(l, h) for l, h in holders if h[0] != data]
            failed_lock, holder_resp = foreign_holders and foreign_holders[
                0] or holders[0]
            holder = holder_resp[0]
            holders_ids = list(set([h[0] for _, h in holders]))
            logger.warn('Persistent lock {0} is already set by {1}'.format(
                failed_lock, holder))
            raise LockAlreadyAcquiredError(
                'Lock for {0} is already acquired by job {1}'.format(
                    failed_lock, holder),
                lock_id=failed_lock,
                holder_id=holder,
                lock_ids=failed_locks,
                holders_ids=holders_ids)
        elif failed:
            logger.error(
                'Failed to set persistent locks {0}, result: {1}'.format(
                    locks, result))
            raise LockError

        return True

    def get_children_locks(self, lock_prefix):
        try:
            retry = self._retry.copy()
            result = retry(self.__inner_get_children_locks, lock_prefix)
        except RetryFailedError:
            raise LockError('Failed to get fetch children locks for {}'.format(
                lock_prefix))
        return result

    def __inner_get_children_locks(self, lock_prefix):
        full_path = self.lock_path_prefix + lock_prefix
        self.client.ensure_path(os.path.normpath(full_path))
        result = self.client.get_children(full_path)
        return ['{0}{1}'.format(lock_prefix, lock) for lock in result]

    def persistent_locks_release(self, locks, check=''):
        try:
            retry = self._retry.copy()
            result = retry(self.__inner_persistent_locks_release,
                           locks=locks,
                           check=check)
        except RetryFailedError:
            raise LockError
        except KazooException as e:
            logger.error(
                'Failed to remove persistent locks {0}: {1}\n{2}'.format(
                    locks, e, traceback.format_exc()))
            raise LockError
        return result

    def __inner_persistent_locks_release(self, locks, check):
        for lockid in locks:
            try:
                if check:
                    data = self.client.get(self.lock_path_prefix + lockid)
                    if data[0] != check:
                        logger.error(
                            'Lock {0} has inconsistent data: {}, expected {}'.
                            format(lockid, data[0], check))
                        raise InconsistentLockError(lock_id=lockid,
                                                    holder_id=data[0])
                self.client.delete(self.lock_path_prefix + lockid)
            except NoNodeError:
                logger.warn(
                    'Persistent lock {0} is already removed'.format(lockid))
                pass
        return True
Beispiel #23
0
class Scheduler:
    def __init__(self, zk_hosts, zk_root):
        self.zk = KazooClient(zk_hosts)
        self.root = zk_root
        self.tasks = set()
        self.event = threading.Event()

    def get_targets(self, task_id):
        result = {}
        node = '/{}/tasks/{}/targets'.format(self.root, task_id)
        for target in self.zk.get_children(node):
            path = '{}/{}'.format(node, target)
            status, _ = self.zk.get(path)
            result[target] = status.decode()
        return result

    def callback(self, task_id):
        node = '/{}/callback/{}'.format(self.root, task_id)
        self.zk.ensure_path(node)

    def copy_task(self, targets, task):
        for target in targets:
            node = '/{}/agents/{}/tasks/{}'.format(self.root, target,
                                                   task['id'])
            tx = self.zk.transaction()
            tx.create(node, json.dumps(task).encode())
            tx.set_data(
                '/{}/tasks/{}/targets/{}'.format(self.root, task['id'],
                                                 target), b'W')
            tx.commit()

    def schedule(self, task_id):
        node = '/{}/tasks/{}'.format(self.root, task_id)
        lock_node = '{}/lock'.format(node)
        self.zk.ensure_path(lock_node)
        lock = Lock(self.zk, lock_node)
        try:
            if lock.acquire(timeout=1):
                data, _ = self.zk.get(node)
                task = json.loads(data.decode())
                p = task.get('parallel', 1)
                rate = task.get('fail_rate', 0)
                targets = self.get_targets(task_id)
                if count(targets, ('F', )) / len(targets) > rate:
                    return self.callback(task_id)
                if count(targets, ('F', 'S', 'K')) == len(targets):
                    return self.callback(task_id)
                wait_schedule = choose(targets, p - count(targets, ('W', 'R')))
                self.copy_task(wait_schedule, task)
        except LockTimeout:
            pass
        finally:
            lock.release()

    def watch_new_task(self, tasks):
        for task_id in set(tasks).difference(self.tasks):
            self.schedule(task_id)
            DataWatch(self.zk, '/{}/signal/{}'.format(self.root, task_id),
                      partial(self.watch_exist_task, task_id=task_id))
        self.tasks = tasks
        return not self.event.is_set()

    def watch_exist_task(self, task_id, *args):
        if self.zk.exists('/{}/callback/{}'.format(self.root, task_id)):
            return False
        self.schedule(task_id)
        return True

    def watch(self):
        ChildrenWatch(self.zk, '/{}/signal'.format(self.root),
                      self.watch_new_task)

    def start(self):
        self.zk.start()
        self.watch()
        self.event.wait()

    def shutdown(self):
        self.event.set()
        self.zk.close()
class ResManager(object):
    '''
    资源管理模块
    '''

    def __init__(self):
        self._zk_client = KazooClient(hosts=CONF.zk_address)
        self._zk_client.start()
        CONF.log.debug("zk client started, zk_address = %s"%(CONF.zk_address))
        self._zk_client.ensure_path(os.path.join(CONF.host_path),"list")
        self.zkHostLock = ZooKeeperLock(CONF.zk_address,"host_lock",os.path.join(CONF.host_path,"lock"))
        self.hostRootPath = os.path.join(CONF.host_path,"list")

    def allocate_agent(self, taskObj, count, agentRes, agentMaxQps):
        '''
        为指定的任务分配压测Agent
        '''

        CONF.log.info(\
            "[Enter]allocate_agent. taskObj=%s, count=%s, agentRes=%s, agentMaxQps=%s"\
                %(taskObj.__dict__, count, agentRes, agentMaxQps))
        # get task id
        taskId = taskObj.task_id
        queryType = taskObj.query_type
        
        # 根据task的query_type获得相应agent类型所占用的资源和MaxQps
        # 这里有一个约束:一个任务中只包含一种类型的压测Agent(资源占用相同)

        # 更新 Host 相关zk 节点
        try:
            # 获取host列表(过滤掉error状态的host)
            hostList = self.read_hosts(nonerr=True)

            # 取host锁
            self._acquire_host_lock()

            # 取transaction
            transaction = self._zk_client.transaction()

            # 本次的agent 分配列表
            agentAllocateList = []

            # 在初始获取lastSeq,所有分配结束后才更新lastSeq到zk
            lastSeq = self._getLastSeq()

            # 广度优先分配资源
            while count > 0:
                # 获取最多resource的host列表
                mostResourcefulHostList = self._select_most_resourceful_hosts(hostList)

                # 如果最多resource的host依然无法满足agentRes则分配失败
                if int(mostResourcefulHostList[0].availableRes) < agentRes:
                    CONF.log.debug("最多resource的host依然无法满足agentRes则分配失败(availableRes=%s,agnetRes=%s)"\
                        %(int(mostResourcefulHostList[0].availableRes),agentRes))

                    break

                for hostObj in mostResourcefulHostList:
                    hostIp = hostObj.ip
                    availableRes = int(hostObj.availableRes)

                    # 当前主机资源满足需求,则分配一个agent在上面
                    if availableRes >= agentRes:
                        lastSeq += 1
                        agentId = "%s_%s"%(hostIp,lastSeq)

                        CONF.log.info("allocate one agent on %s. (availableRes=%s, agentRes=%s)"%(hostIp,availableRes,agentRes))
                        agentAllocateList.append({"hostIp":hostIp, "agentId":agentId, "agentRes":agentRes, "taskId":taskId, "queryType":queryType})

                        availableRes -= agentRes
                        hostObj.availableRes = str(availableRes)
                        hostObj.status = "occupied"

                        count -= 1

                        # 更新zk (availableRes)
                        availableResPath = os.path.join(self.hostRootPath,hostIp,"availableRes")
                        statusPath = os.path.join(self.hostRootPath,hostIp,"status")

                        self._zk_client.ensure_path(availableResPath)
                        self._zk_client.ensure_path(statusPath)

                        transaction.set_data(availableResPath,hostObj.availableRes)
                        transaction.set_data(statusPath,hostObj.status)

                        # 分配完成
                        if count == 0:
                            break

            # 资源不足
            if count > 0:
                raise Exception("资源不足(taskObj = %s, count = %d)"%(taskObj.__dict__, count))

            # 分配成功,更新zk(lastSeq)
            lastSeqPath = os.path.join(CONF.host_path,"lastSeq")
            self._zk_client.ensure_path(lastSeqPath)
            transaction.set_data(lastSeqPath,str(lastSeq))

        finally:
            # 释放host锁
            self._release_host_lock()

        # 调用任务管理模块更新任务相关的resource信息
        for agentObj in agentAllocateList:
            taskObj.add_agent(agentObj.get("hostIp"),agentObj.get("agentId"),agentMaxQps,agentRes)

        # 执行transaction, 更新Host zk 节点
        transaction.commit()

        # 初始化AgentCtl,启动Agent
        for agentObj in agentAllocateList:
            agentCtl = AgentCtl(agentObj.get("hostIp"),agentObj.get("agentId"),agentObj.get("taskId"), agentObj.get("queryType"))
            agentCtl.create()

        CONF.log.debug("[Exit]allocate_agent. taskObj=%s, count=%s, agentRes=%s, agentMaxQps=%s"\
                        %(taskObj.__dict__, count, agentRes, agentMaxQps))

    def _select_most_resourceful_hosts(self,hostlist):
        '''
        utility function.
        从给定hostlist中选出拥有最多availableRes的hosts

        Input:
            hostlist:   hostObj的列表
        Output:
            给定hostlist中拥有最多availableRes的hostObj
            (列表,因为有可能有多个availableRes相同的host
        '''

        CONF.log.debug("[Enter] _select_most_resourceful_hosts(), hostlist[0] = %s(type=%s,len=%d)"%(hostlist[0].__dict__,type(hostlist),len(hostlist)))

        # 1. Sort by availableRes
        sortedHostList = sorted(hostlist,key=lambda host:int(host.availableRes), reverse=True)

        # 2. 遍历取出availableRes最多的返回
        maxAvailableRes = int(sortedHostList[0].availableRes)
        for idx in range(len(sortedHostList)):
            hostObj = sortedHostList[idx]
            if int(hostObj.availableRes) < maxAvailableRes:
                return sortedHostList[:idx]

        return sortedHostList

    def _getLastSeq(self):
        path = "/acp/host/lastSeq"
        result = self._zk_client.get(path)[0]
        return int(result)
            
    def getHostStatus(self,hostIp):
        status_path = os.path.join(self.hostRootPath,hostIp,"status") 
        return self._zk_client.get(status_path)[0]

    def release_agents_for_task(self, taskObj, count):
        '''
        释放指定task,指定数量的agent

        taskObj:    描述task的对象
        count:      待释放agent的数量
        '''

        CONF.log.debug("[Enter]release_agent_for_task: %s"%(taskObj.__dict__))
        agentsForTask = taskObj.get_all_agent()
        if count > len(agentsForTask):
            raise Exception("count > number of agents for task: %s"%(str(taskObj)))

        # 选取count个待释放agent
        agentsToBeReleased = agentsForTask[:count]
        for agent in agentsToBeReleased:
            self.release_agent(agent)
        CONF.log.debug("[Exit]release_agent_for_task: %s"%(taskObj.__dict__))

    def release_all_agents(self, taskObj):
        agentsForTask = taskObj.get_all_agent()
        for agent in agentsForTask:
            self.release_agent(agent)

    def release_agent(self, agentObj):
        '''
        释放指定的Agent
        '''
        CONF.log.debug("[Enter]release_agent: %s"%(agentObj.__dict__))
        agentCtl = AgentCtl(agentObj.host,agentObj.agent_id,agentObj.task_id,agentObj.query_type)

        # 通知Agent Daemon停agent
        agentCtl.delete()

        # 取锁, 然后更新Host节点(availableRes)
        self._acquire_host_lock()
        try:
            # 更新availableRes
            availableResPath = os.path.join(self.hostRootPath,agentCtl.hostIp,"availableRes")
            currentValue = int(self._zk_client.get(availableResPath)[0])
            agentRes = int(agentObj.resource_num)
            currentValue += agentRes

            transaction = self._zk_client.transaction()

            # 取得totalRes
            totalResPath = os.path.join(self.hostRootPath,agentCtl.hostIp,"totalRes")
            totalRes = int(self._zk_client.get(totalResPath)[0])

            # 更新availableRes on zk
            transaction.set_data(availableResPath,str(currentValue))

            # 如果host上所有资源都被释放则置host状态为idle
            if currentValue == totalRes:
                statusPath = os.path.join(self.hostRootPath,agentCtl.hostIp,"status")
                CONF.log.debug("release_agent() set host status to idle")
                transaction.set_data(statusPath,"idle")

            # 通知task manager释放Agent资源
            Agent.delete_agent(agentObj)
            transaction.commit()
            CONF.log.debug("[Exit]release_agent(), reset availableRes data (path=%s, value=%s)"%(availableResPath,str(currentValue)))
        finally:
            # 释放锁
            self._release_host_lock()
  
    def read_hosts(self,filter=None,nonerr=False):
        try:
            CONF.log.debug("[Enter]read_hosts()")
            # 取锁
            self._acquire_host_lock()

            # 取所有host根节点
            hostIpList = self._zk_client.get_children(self.hostRootPath)

            ret = []
            for hostIp in hostIpList:
                # 构造一个hostObj
                hostObj = Host(hostIp)

                hostpath = os.path.join(self.hostRootPath,hostIp) 
                hostAttrList = self._zk_client.get_children(hostpath)
                
                for attr in hostAttrList:
                    attrpath = os.path.join(self.hostRootPath,hostIp,attr)
                    value = self._zk_client.get(attrpath)[0]
                    setattr(hostObj,attr,value)
                    CONF.log.debug("setattr: %s=%s"%(attr,value))

                # 过滤掉error状态的host
                if nonerr and hostObj.status == "error":
                    continue

                # 检查当前hostObj是否满足filter条件, 满足则添加到结果集
                if filter == None or set(filter.items()).issubset(set(hostObj.__dict__.items())):
                    ret.append(hostObj)

            CONF.log.debug("[Exit]read_hosts(), ret = %s, len=%d, ret[0] = %s"%(ret,len(ret),ret[0].__dict__))
            return ret
        finally:
            # 释放锁
            self._release_host_lock()

    def add_hosts(self,hostList):
        '''
        hostObj: {
            "ip":"1.1.1.1", "totalRes": 4,
            "availableRes": 4, "status": idle
        }
        '''

        CONF.log.info("[Enter]add_hosts()")
        try: 
            # 取锁
            self._acquire_host_lock()

            for hostObj in hostList:
                # 创建host根节点
                hostPath = os.path.join(self.hostRootPath,hostObj.ip)
                if self._zk_client.exists(hostPath):
                    raise Exception("The zk path \"%s\" already exists"%(hostPath))
    
                transaction = self._zk_client.transaction()
                transaction.create(hostPath)
    
                # 创建host属性节点
                for k,v in hostObj.__dict__.items():
                    keyPath = os.path.join(hostPath,k)
                    transaction.create(keyPath)
                    transaction.set_data(keyPath,str(v))
    
                transaction.commit()
            CONF.log.info("[Exit]add_hosts()")
        finally:
            # 释放锁
            self._release_host_lock()

    def del_hosts(self,hostList):
        CONF.log.info("[Enter]del_hosts()")
        try:
            # 取锁
            self._acquire_host_lock()

            for hostObj in hostList:
                # 删除host根节点
                hostPath = os.path.join(self.hostRootPath,hostObj.ip)
                if not self._zk_client.exists(hostPath):
                    raise Exception("the zk path \"%s\" does not exist."%(hostPath))

                self._zk_client.delete(hostPath,recursive=True)
            CONF.log.info("[Exit]del_hosts()")
        finally:
            # 释放锁
            self._release_host_lock()

    def _acquire_host_lock(self):
        ret = self.zkHostLock.acquire()
        if not ret:
            raise Exception("acquire host lock failed.")

    def _release_host_lock(self):
        self.zkHostLock.release()

    def stopZkClient(self):
        CONF.log.debug("[Enter]stopZkClient()")
        if self._zk_client != None:
            self._zk_client.stop() 
        self._zk_client = None

    def __del__(self):
        #self.stopZkClient()
        pass
Beispiel #25
0
class ArcusZooKeeper:
    """
  ZooKeeper helper for Arcus
  """
    def __init__(self, hostports, timeout):
        self.hostports = hostports
        self.timeout = timeout
        self.zk = KazooClient(hosts=hostports, read_only=False)

    def start(self):
        self.zk.start()

    def stop(self):
        self.zk.stop()

    def init_structure(self):
        if self.zk.exists('/arcus'):
            print 'init_arcus_structure: fail (/arcus exists)'
            return False

        tx = self.zk.transaction()
        tx.create('/arcus', b'')
        tx.create('/arcus/cache_list', b'')
        tx.create('/arcus/client_list', b'')
        tx.create('/arcus/cache_server_mapping', b'')
        results = tx.commit()
        if len(results) > 0:
            print results
            return False

        print 'init_structure: success'
        return True

    def drop_structure(self):
        self.zk.delete('/arcus', recursive=True)
        print 'delete_structure: success'

    def get_structure(self):
        return self.zk.get_children('/arcus')

    def get_mapping_for_service(self, service_code):
        result = []
        cache_list = '/arcus/cache_list/%s' % service_code
        mapping = '/arcus/cache_server_mapping'

        all = self.zk.get_children(mapping)
        for ipport in all:
            codes = self.zk.get_children('%s/%s' % (mapping, ipport))
            if len(codes) > 0:
                if codes[0] == service_code:
                    result.append('%s/%s' % (mapping, ipport))

        return result

    def get_config_for_service(self, service_code):
        cache_list = '/arcus/cache_list/%s' % service_code
        data, stat = self.zk.get(cache_list)
        return json.loads(data), data, stat

    def update_service_code(self, cluster):
        cache_list = '/arcus/cache_list/%s' % cluster['serviceCode']
        client_list = '/arcus/client_list/%s' % cluster['serviceCode']
        mapping = '/arcus/cache_server_mapping'

        try:
            delete_list = self.get_mapping_for_service(cluster['serviceCode'])

            # 0. Create a transaction
            tx = self.zk.transaction()

            # 1. Cache list
            if self.zk.exists(cache_list):
                c1, c2, c3 = self.get_config_for_service(
                    cluster['serviceCode'])
                cluster['created'] = c1.get('created')
                cluster['modified'] = str(datetime.datetime.now())
                tx.set_data(cache_list, json.dumps(cluster))
            else:
                cluster['created'] = str(datetime.datetime.now())
                tx.create('/arcus/cache_list/%s' % cluster['serviceCode'],
                          json.dumps(cluster))

            # 2. Client list
            if self.zk.exists(client_list):
                pass
            else:
                tx.create('/arcus/client_list/%s' % cluster['serviceCode'],
                          b'')

            # 3. Mapping
            for each in delete_list:
                tx.delete('%s/%s' % (each, cluster['serviceCode']))
                tx.delete(each)

            for server in cluster['servers']:
                global_config = cluster.get('config', {})
                per_node_config = server.get('config', {})
                config = dict(global_config.items() + per_node_config.items())

                if len(config) == 0:
                    print 'update_service_code: config not found for {0}'.format(
                        server)
                    continue

                map_ip = '/arcus/cache_server_mapping/%s:%s' % (server['ip'],
                                                                config['port'])
                map_code = '%s/%s' % (map_ip, cluster['serviceCode'])

                tx.create(map_ip, json.dumps(config))
                tx.create(map_code, b'')

            # 4. Commit
            results = tx.commit()
            print results
        except Exception as e:
            traceback.print_exc()

    def delete_service_code(self, cluster):
        cache_list = '/arcus/cache_list/%s' % cluster['serviceCode']
        client_list = '/arcus/client_list/%s' % cluster['serviceCode']
        mapping = '/arcus/cache_server_mapping'

        try:
            delete_list = self.get_mapping_for_service(cluster['serviceCode'])

            # 0. Create a transaction
            tx = self.zk.transaction()

            # 1. Cache list
            tx.delete('/arcus/cache_list/%s' % cluster['serviceCode'])

            # 2. Client list
            tx.delete('/arcus/client_list/%s' % cluster['serviceCode'])

            # 3. Mapping
            for each in delete_list:
                tx.delete('%s/%s' % (each, cluster['serviceCode']))
                tx.delete(each)

            # 4. Commit
            results = tx.commit()
            print results
        except Exception as e:
            traceback.print_exc()

    def list_all_service_code(self):
        result = []
        cache_list = '/arcus/cache_list'

        try:
            list = self.zk.get_children(cache_list)
            for each in list:
                result.append(self.list_service_code(each))
            return result
        except Exception as e:
            traceback.print_exc()

    def list_service_code(self, service_code):
        result = {}
        cache_list = '/arcus/cache_list/%s' % service_code
        client_list = '/arcus/client_list/%s' % service_code
        mapping = '/arcus/cache_server_mapping'

        try:
            data, stat = self.zk.get(cache_list)
            static_list = self.get_mapping_for_service(service_code)
            current_list = self.zk.get_children(cache_list)

            # sort the lists
            static_list.sort()
            current_list.sort()

            # get clusterConfig
            cluster = json.loads(data)

            # get clusterStatus
            static_set = set([each.split('/')[-1] for each in static_list])
            current_set = set([each.split('-')[0] for each in current_list])
            offline = static_set - current_set
            online = static_set - offline
            undefined = current_set - static_set

            result['serviceCode'] = service_code
            result['config'] = cluster
            result['online'] = list(online)
            result['offline'] = list(offline)
            result['undefined'] = list(undefined)
            result['created'] = cluster.get('created')
            result['modified'] = cluster.get('modified')
            return result

        except Exception as e:
            traceback.print_exc()
Beispiel #26
0
class TrainCBox(object):
    def __init__(self, type, name):
        self._Type = type
        self._Name = "%s%s" % (type, str(name))
        self._VName = HOST['name']
        self._Path = "/%s/%s/%s" % (self._Type, self._VName, self._Name)
        self._ZKHost = ZKSERVERS['hosts']
        self._IP = HOST['ip']
        self._Port = HOST['port']
        self._ID = int(name)
        self._ZK = None
        self._Chatbot = None
        self._Current_mina_master = ''
        self._Master_path = '/MinAMaster/%s' % self._VName
        self._MonitorRunning = False
        self._MinARunning = False
        self._Conn = None
        logger.debug('create a %s box named %s in VM %s.' %
                     (self._Type, self._Name, self._VName))

    def connectZK(self):
        self._ZK = KazooClient(hosts=self._ZKHost)
        logger.info('%s is connecting ZK server.' % self._Path)

    def getType(self):
        return self._Type

    def getName(self):
        return self._Name

    def getVName(self):
        return self._VName

    def setZK(self, zk):
        self._ZK = zk

    def getZK(self):
        return self._ZK

    def startZK(self):
        self._ZK.start()
        logger.debug(
            'start one connection with ZK server by a %s box named %s in VM %s'
            % (self._Type, self._Name, self._VName))

    def stopZK(self):
        self._ZK.stop()
        logger.debug(
            'stop connection with ZK server by a %s box named %s in VM %s' %
            (self._Type, self._Name, self._VName))

    def addZKListener(self):
        def my_listenser(state):
            if state == KazooState.LOST:
                self.startZK()
            elif state == KazooState.SUSPENDED:
                self.connectZK()
                self.startZK()
            else:
                self.startMonitor()

        self._ZK.add_listener(my_listenser)

    def startMonitor(self):
        #self._MonitorRunning = True
        # scribe Node Data Changes
        @self._ZK.DataWatch(self._Path)
        def watch_node_data_change(data, stat, path):
            #parse data
            #update the corresponding training status of train-task-sheet in MySQL database
            #you can invoke other class to complete this task above
            #then you also continue to instance a training chatterbot object for doing the key train task really

            if data:
                temp_str = eval(data.decode("utf-8"))['Target']
                if self._Conn is None:
                    self._Conn = mymysql.myconnect(KBDATABASES)
                param = (temp_str.split("_")[0], temp_str.split("_")[-1], 0)
                temp_status = mymysql.myselectstatus(self._Conn, param)
                # logger.error(temp_status)
                if operator.ne(temp_str, 'Null') and temp_status[0] == 3:
                    logger.info('Watch one node %s with data %s is not Null' %
                                (self._Path, temp_str))
                    param = (1, temp_str.split("_")[0],
                             temp_str.split("_")[-1], 3)
                    paramkg = (1, temp_str.split("_")[0],
                               temp_str.split("_")[-1], 0)
                    logger.debug('untrained kb %s is ready to train in mysql' %
                                 temp_str.split("_")[0])
                    if mymysql.myupdate(self._Conn, param):
                        logger.info(
                            'untrained kb %s in mysql turns from status 3 to 1.'
                            % temp_str.split("_")[0])
                        mymysql.myupdatekg(self._Conn, paramkg)
                        if self.trainkb(temp_str):
                            param = (2, temp_str.split("_")[0],
                                     temp_str.split("_")[-1], 1)
                            if mymysql.myupdate(self._Conn, param):
                                logger.info(
                                    'success: trained kb %s in mysql turns from status 1 to 2.'
                                    % temp_str.split("_")[0])
                                mymysql.myupdatekg(self._Conn, param)
                        else:
                            param = (0, temp_str.split("_")[0],
                                     temp_str.split("_")[-1], 1)
                            if mymysql.myupdate(self._Conn, param):
                                logger.info(
                                    'failure: untrained kb %s in mysql turns from status 1 to 0.'
                                    % temp_str.split("_")[0])
                                mymysql.myupdatekg(self._Conn, param)

                    self.deleteZKnodedata()
                    mymysql.myclose(self._Conn)
                    self._Conn = None

    def addtraintaskMonitor(self):
        # monitor Node Data Changes
        if self._ZK.exists(self._Path):
            data, _ = self._ZK.get(self._Path)
            logger.debug('Monitor one A Box node with data: %s, path: %s' %
                         (data.decode("utf-8"), self._Path))
            if data:
                temp_str = eval(data.decode("utf-8"))['Target']
                if self._Conn is None:
                    self._Conn = mymysql.myconnect(KBDATABASES)
                param = (temp_str.split("_")[0], temp_str.split("_")[-1], 0)
                temp_status = mymysql.myselectstatus(self._Conn, param)
                if operator.ne(temp_str, 'Null') and temp_status[0] == 3:
                    logger.debug('Watch one node %s with data %s is not Null' %
                                 (self._Path, temp_str))
                    param = (1, temp_str.split("_")[0],
                             temp_str.split("_")[-1], 3)
                    paramkg = (1, temp_str.split("_")[0],
                               temp_str.split("_")[-1], 0)
                    logger.info('untrained kb %s is ready to train in mysql' %
                                temp_str.split("_")[0])
                    if mymysql.myupdate(self._Conn, param):
                        logger.info(
                            'untrained kb %s in mysql turns from status 3 to 1.'
                            % temp_str.split("_")[0])
                        mymysql.myupdatekg(self._Conn, paramkg)
                        if self.trainkb(temp_str):
                            param = (2, temp_str.split("_")[0],
                                     temp_str.split("_")[-1], 1)
                            if mymysql.myupdate(self._Conn, param):
                                logger.info('success:  from status 1 to 2.')
                                mymysql.myupdatekg(self._Conn, param)
                        else:
                            param = (0, temp_str.split("_")[0],
                                     temp_str.split("_")[-1], 1)
                            if mymysql.myupdate(self._Conn, param):
                                logger.info('failure: from status 1 to 0.')
                                mymysql.myupdatekg(self._Conn, param)
                    self.deleteZKnodedata()
                mymysql.myclose(self._Conn)
                self._Conn = None
        else:
            self.InitialABOXNode()

    def addMinAMasterMonitor(self):
        #self._MonitorRunning = True
        # scribe Master Node Data Removed
        if self._ZK.exists(self._Master_path) is None:
            self._ZK.create(self._Master_path,
                            None,
                            None,
                            ephemeral=False,
                            sequence=False,
                            makepath=True)
        else:
            self._ZK.set(self._Master_path, None)

        @self._ZK.DataWatch(self._Master_path)
        def watch_node_data_removed(data, stat, path):
            if data is None:  # show this is remove event
                self._Current_mina_master = ''
                self.searchIdleAwithId()

    def searchIdleAwithId(self):
        # List all children of A type BOX under the condition of zk connection
        children = self._ZK.get_children("/%s/%s" % (self._Type, self._VName))
        if children is not None:
            temp_mina = sys.maxsize
            logger.debug("search idle A box %d with names %s" %
                         (len(children), children))
            random.shuffle(children)
            for child in children:
                child_path = "/%s/%s/%s" % (self._Type, self._VName, child)
                data, stat = self._ZK.get(child_path)
                if eval(data.decode("utf-8"))['Target'] == 'Null':
                    temp = child.split("A")[-1]
                    if temp_mina > int(temp):
                        temp_mina = int(temp)
            logger.debug("mina=%d, id = %d" % (temp_mina, self._ID))
            if temp_mina == self._ID:
                if self.takeMinAMaster():
                    logger.info("MinA = %d, Node = %s" %
                                (temp_mina, self._Name))
                    self.processTrainTaskAssign()

    def searchIdleAboxassign(self):
        box_list = self._ZK.get_children("/%s/%s" % (self._Type, self._VName))
        idle_box = []
        random.shuffle(box_list)
        for box in box_list:
            node = "/%s/%s/%s" % (self._Type, self._VName, box)
            data, _ = self._ZK.get(node)
            if eval(data.decode("utf-8"))['Target'] == 'Null':
                idle_box.append(box)
                logger.debug('idle A box is %s.' % box)
        logger.info(idle_box)
        if idle_box:
            self.processtraintask(idle_box)

    def processtraintask(self, box):
        while True:
            datas = self.startMonitorMySQL()
            if len(datas):
                for aboxdata in datas:
                    oneabox = box[0]
                    param = (3, aboxdata.split("_")[0],
                             aboxdata.split("_")[-1], 0)
                    logger.debug(
                        'untrained kb %s is ready for assigned in a A idle box called %s'
                        % (aboxdata.split("_")[0], oneabox))
                    if mymysql.myupdate(self._Conn, param):
                        label = self.assignOneTrainTasktoABox(
                            oneabox, aboxdata)
                        if not label:
                            param = (0, aboxdata.split("_")[0],
                                     aboxdata.split("_")[-1], 3)
                            mymysql.myupdate(self._Conn, param)
                            logger.info(
                                'failure for update training mission %s. so from status 3 to 0.'
                                % aboxdata.split("_")[0])
                        else:
                            box.pop(0)
                            logger.info(
                                'success for update training mission %s from status 0 to 3.'
                                % aboxdata.split("_")[0])

                    if not box:
                        mymysql.myclose(self._Conn)
                        self._Conn = None
                        logger.debug('delete ZK node %s' % self._Master_path)
                        return True
            time.sleep(10)

    def takeMinAMaster(self):
        if self._Type == 'A':
            address = "{\"Name\":\"%s\"}" % self._Name
            address = address.encode('utf-8')
            try:
                self._ZK.create(self._Master_path,
                                address,
                                None,
                                ephemeral=False,
                                sequence=False,
                                makepath=True)
            except self._ZK.NodeExistsError:
                data, stat = self._ZK.get(self._Master_path)
                if data:
                    self._Current_mina_master = eval(
                        data.decode("utf-8"))['Name']
                logger.debug('current mina is %s' % self._Current_mina_master)
                return False
                #break
            finally:
                self._Current_mina_master = self._Name
                logger.debug('current mina is itself i.e %s.' % self._Name)
                return True

    def processTrainTaskAssign(self):
        while True:
            datas = self.startMonitorMySQL()
            if len(datas):
                for aboxdata in datas:
                    oneabox = self.findOneIdleABox()
                    if oneabox == '':
                        param = (3, aboxdata.split("_")[0],
                                 aboxdata.split("_")[-1], 0)
                        logger.info(
                            'untrained kb %s is ready for assigned in min A box called %s'
                            % (aboxdata.split("_")[0], self._Name))
                        if mymysql.myupdate(self._Conn, param):
                            logger.info(
                                'success for update training mission %s from status 0 to 3.'
                                % aboxdata.split("_")[0])
                            if self.assignOneTrainTasktoABox(
                                    self._Name, aboxdata):
                                mymysql.myclose(self._Conn)
                                self._Conn = None
                                self._ZK.delete(self._Master_path,
                                                recursive=True)
                                logger.debug('delete ZK node %s' %
                                             self._Master_path)
                                return True
                            else:
                                param = (0, aboxdata.split("_")[0],
                                         aboxdata.split("_")[-1], 3)
                                mymysql.myupdate(self._Conn, param)
                                logger.info(
                                    'failure for update training mission %s. so from status 3 to 0.'
                                    % aboxdata.split("_")[0])
                    else:
                        param = (3, aboxdata.split("_")[0],
                                 aboxdata.split("_")[-1], 0)
                        logger.info(
                            'untrained kb %s is ready for assigned in a A idle box called %s'
                            % (aboxdata.split("_")[0], oneabox))
                        if mymysql.myupdate(self._Conn, param):
                            logger.info(
                                'success for update training mission %s from status 0 to 3.'
                                % aboxdata.split("_")[0])
                            label = self.assignOneTrainTasktoABox(
                                oneabox, aboxdata)
                            if not label:
                                param = (0, aboxdata.split("_")[0],
                                         aboxdata.split("_")[-1], 3)
                                mymysql.myupdate(self._Conn, param)
                                logger.info(
                                    'failure for update training mission %s. so from status 3 to 0.'
                                    % aboxdata.split("_")[0])
            time.sleep(10)

    def findOneIdleABox(self):
        oneidleabox = ''
        children = self._ZK.get_children("/%s/%s" % (self._Type, self._VName))
        logger.debug(
            "find one idle A box There are %s children with names %s" %
            (len(children), children))
        random.shuffle(children)
        for child in children:
            child_path = "/%s/%s/%s" % (self._Type, self._VName, child)
            data, stat = self._ZK.get(child_path)
            if eval(data.decode("utf-8"))['Target'] == 'Null':
                if operator.ne(child, self._Name):
                    oneidleabox = child
                    logger.debug('one idle A box is %s.' % oneidleabox)
                    break
        return oneidleabox

    def assignOneTrainTasktoABox(self, oneabox, aboxdata):
        oneabox_path = "/%s/%s/%s" % (self._Type, self._VName, oneabox)
        oneabox_address = "{\"Target\":\"%s\"}" % aboxdata
        oneabox_address = oneabox_address.encode('utf-8')
        data, stat = self._ZK.get(oneabox_path)
        if eval(data.decode("utf-8"))['Target'] == 'Null':
            self._ZK.set(oneabox_path, oneabox_address)
            logger.info('assign kb %s to A box %s' % (aboxdata, oneabox))
            return True
        else:
            logger.error('Error:A box %s is busy and can not be assigned.' %
                         oneabox)
            return False

    def InitialMinANode(self):
        if self._ZK.exists(self._Master_path):
            self._ZK.delete(self._Master_path, recursive=True)
            logger.info('ZK node %s is deleted.' % self._Master_path)

    def InitialABOXNode(self):
        address = "{\"Target\":\"Null\"}"
        address = address.encode('utf-8')
        if self._ZK.exists(self._Path):
            self._ZK.delete(self._Path, recursive=True)
        self._ZK.create(self._Path,
                        address,
                        None,
                        ephemeral=False,
                        sequence=False,
                        makepath=True)
        logger.info('create a A box node: %s, data: %s' %
                    (self._Path, address.decode("utf-8")))

    def InitialBBOXNode(self):
        address = "{\"Target\":\"Null\",\"Add\":\"%s:%s/%s\",\"status\":\"0\",\"update_time\":\"%f\"}" % (
            self._IP, self._Port, self._ID, time.time())
        address = address.encode('utf-8')
        if self._ZK.exists(self._Path):
            self._ZK.delete(self._Path, recursive=True)
        self._ZK.create(self._Path,
                        address,
                        None,
                        ephemeral=False,
                        sequence=False,
                        makepath=True)
        logger.info('create a B box node: %s, data: %s' %
                    (self._Path, address.decode("utf-8")))
        vmknode = "/%s/%s" % (HOST['name'], CBOX['Bk'])
        if self._ZK.exists(vmknode):
            self._ZK.delete(vmknode, recursive=True)
        self._ZK.create(vmknode,
                        None,
                        None,
                        ephemeral=False,
                        sequence=False,
                        makepath=True)
        logger.info('create a VM/k node: %s, not data. ' % vmknode)

    def startMonitorMySQL(self):
        self._Conn = mymysql.myconnect(KBDATABASES)
        train_tasks = []
        param = (0, 0)
        selectresult = mymysql.myselect(self._Conn, param)
        for row in selectresult:
            if str(row[1]).isdigit():
                train_tasks.append("%s_%s" % (row[0], row[1]))
        logger.info('current train tasks is %s from mysql.' % train_tasks)
        return train_tasks

    def updateselfZKBBox(self, status):
        oneabox_path = self._Path
        data, _ = self._ZK.get(oneabox_path)
        oneabox_address = "{\"Target\":\"%s\",\"Add\":\"%s\",\"status\":\"%s\",\"update_time\":\"%f\"}" % (
            eval(data.decode("utf-8"))['Target'], eval(
                data.decode("utf-8"))['Add'], str(status), time.time())
        oneabox_address = oneabox_address.encode('utf-8')
        self._ZK.set(oneabox_path, oneabox_address)
        logger.info('success update B Box node %s with data %s.' %
                    (oneabox_path, oneabox_address))

    def updateselfZKBBoxTarget(self, target, status):
        # updating 'oneabox' B tpye node data in ZK Server
        oneabox_path = self._Path
        data, _ = self._ZK.get(oneabox_path)
        oneabox_address = "{\"Target\":\"%s\",\"Add\":\"%s\",\"status\":\"%s\",\"update_time\":\"%f\"}" % (
            target, eval(
                data.decode("utf-8"))['Add'], str(status), time.time())
        oneabox_address = oneabox_address.encode('utf-8')
        self._ZK.set(oneabox_path, oneabox_address)
        logger.info('success update B')

    def stop(self):
        self._MonitorRunning = False
        self._MinARunning = False
        self.stopZK()

    def initcbot(self, kbname, onlyread=False):
        try:
            self._Chatbot = ChatBot(
                self._Name,
                storage_adapter=CHATTERBOT['storage_adapter'],
                filters=['chatterbot.filters.RepetitiveResponseFilter'],
                database_uri=KGDATABASES['database_uri'],
                database='ai_%s' % kbname,
                read_only=onlyread,
            )
            # logger.info(self._Chatbot)
        except Exception as msg:
            logger.info('Failure to initialize Chatterbot.', exc_info=True)
            logger.error(msg)

    def preprocess(self, sentence, companyid=None):
        if ISFENCI:
            if ISSYMS:
                return fenci.symp_sentence(sentence, companyid)
            else:
                return sentence
        else:
            return sentence

    def trainkb(self, kbname):
        self.initcbot(kbname)
        try:
            logger.info("start set trainer")
            self._Chatbot.set_trainer(ListTrainer)
        except Exception as msg:
            logger.error(msg)
        logger.info("start set trainer")
        a = 0
        param = (kbname.split("_")[0])
        selectresult = mymysql.myselectqas(self._Conn, param)
        company_id = mymysql.myselectcpid(self._Conn, param)
        logger.debug('start training the knowdata: %s and the companyid: %s.' %
                     (kbname, company_id[0]))
        b = len(selectresult)
        for row in selectresult:
            answer = "%s@%s" % (row[2], row[0])
            question = self.preprocess(row[1], company_id[0])
            self._Chatbot.train([question, answer])
            logger.debug('Train: %d, %s --> %s.' % (a, question, answer))
            a = a + 1

        if a >= b:
            logger.info('success training.')
            return True
        else:
            logger.info('failure training.')
            return False

    def deleteZKnodedata(self):
        #oneabox_path = "/%s/%s/%s" % (self._Type, self._VName, self._Name)
        oneabox_path = self._Path
        oneabox_address = "{\"Target\":\"Null\"}"
        oneabox_address = oneabox_address.encode('utf-8')
        self._ZK.set(oneabox_path, oneabox_address)
        logger.info('A box %s turns busy into idle.' % self._Path)

    def startBZKmonitor(self):
        children = self._ZK.get_children("/%s/%s" % (self._Type, self._VName))
        #list(map(lambda child: self._ZK.get(child_path), children))
        for child in children:
            child_path = "/%s/%s/%s" % (self._Type, self._VName, child)
            data, stat = self._ZK.get(child_path)
            # if eval(data.decode("utf-8"))['status'] == str(0):
            kbid = eval(data.decode("utf-8"))['Target']
            if operator.ne(kbid, 'Null'):
                if time.time() > float(
                        eval(
                            data.decode("utf-8"))['update_time']) + TIMERHOURS:
                    onebbox_address = "{\"Target\":\"Null\",\"Add\":\"%s\",\"status\":\"0\",\"update_time\":\"%f\"}" % (
                        eval(data.decode("utf-8"))['Add'], time.time())
                    onebbox_address = onebbox_address.encode('utf-8')
                    self._ZK.set(child_path, onebbox_address)
                    logger.info(
                        'set B Box node %s is null and status 0 because of timeout a half hour.'
                        % child_path)
                    tmp_node = "/%s/%s/%s/%s" % (self._VName, CBOX['Bk'], kbid,
                                                 child)
                    if self._ZK.exists(tmp_node):
                        transaction = self._ZK.transaction()
                        transaction.delete(tmp_node)
                        transaction.commit()
                        logger.info('delete a VM/k/kb/Box node %s.' % tmp_node)
Beispiel #27
0
    @zk.ChildrenWatch("/xy/test")
    def watch_children(children):
        #print("watch_children of /xy/test, Children are now: %s" % str(children))
        #print("watch_children of /xy/test, Children count: %d" % len(children))
        pass

    # Above function called immediately, and from then on

    @zk.DataWatch("/xy/test")
    def watch_node(data, stat):
        #print("watch_node, Version: %s, data: %s" % (stat.version, data.decode("utf-8")))
        pass

    #trans, great!!!
    transaction = zk.transaction()
    transaction.check('/xy/test/node2', version=3)
    transaction.create('/xy/test/node2', b"a value")
    result = transaction.commit()
    print("transaction result %s" % str(result))

    print("----------------------------")

    #     for i in range(1,100):
    #         try:
    #             result = zk.create("/xy/test/node", b"a value", acl=None, sequence=True, ephemeral=True)
    #         except Exception, e:
    #             print('=========== exception when create node, %s' % e)
    #         else:
    #             #print('=========== create /xy/test/node reuslt=%s' % result )
    #             pass
Beispiel #28
0
class Zookeeper:
    def __init__(self, hosts, max_merge_seq):
        print('create a zookeeper object')
        self.zk = ""
        self.IsConn = False
        self.Hosts = hosts
        self.MAX_MERGE_FILE_SEQUENCE = max_merge_seq
        self.filename = ''
        self.pattern = ''
        self.process_path = ''

    def connect(self):
        """
        connect to zookeeper
        :return:zookeeper object
        """
        print('try connect to zookeeper')
        self.zk = KazooClient(self.Hosts)
        try:
            self.zk.start()
        except Exception as e:
            print("connect zookeeper failed, err:%s" % e)
            sys.exit()
        self.IsConn = True
        print('connect zookeeper success')
        return self.zk

    def get_node(self, node_path):
        """
        获取空闲的process_id
        :return: process_id
        """
        self.connect()
        self.process_path = node_path
        node_list = []
        if not (self.zk.exists(node_path)):
            logging.error('zookeeper process node path: %s not exist' %
                          node_path)
            sys.exit()
        childs = self.zk.get_children(node_path)
        # len = 0
        p1 = re.compile(r"^process")
        for c in childs:
            if re.findall(p1, c):
                node_list.append(c)
        node_list = sorted(node_list)
        if len(node_list) <= 0:
            print("no process id in zookeeper process path")
            sys.exit()
        get_times = 0
        while 1:
            for node in node_list:
                lock_flag = False
                node_name = '%s/%s' % (node_path, node)
                n_child = self.zk.get_children(node_name)
                if len(n_child) > 0:
                    for n in n_child:
                        if n == 'lock':
                            lock_flag = True
                if lock_flag:
                    continue
                lock_node = "%s/%s" % (node_name, 'lock')
                self.zk.create(lock_node, ephemeral=True)
                # process_id = ''.join(node.split('_')[1:])
                print('get process_id :%s from zookeeper ' % node)
                return node
            get_times += 1
            print("no free process id in zookeeper")
            if get_times >= 3:
                print(
                    "get process id faild three times, please check zookeeper process id, exit"
                )
                sys.exit()

    def lock(self, lock):
        """
        lock the free node
        :param lock:
        :return:
        """
        self.zk.create(lock, ephemeral=True)

    def check_exists(self, node_path):
        return self.zk.exists(node_path)

    def get_config(self, config_path, config_node):
        """
        generate config files based on node's information
        :param config_path:
        :param config_node:
        :return:
        """
        data, stat = self.zk.get(config_node)
        with open(config_path + "config.ini", 'w') as f:
            f.writelines(data.decode())

    def get_node_value(self, zk_node):
        """
        获取zookeeper的节点信息
        :param zk_node:
        :return: data:node的value
                 stat:node的状态信息
        """
        data, stat = self.zk.get(zk_node)
        return data, stat

    def set_node_value(self, zk_node, data):
        """
        设置zookeeper节点的value
        :param zk_node:
        :param data:
        :return:
        """
        return self.zk.set(zk_node, value=data)

    def delete_node(self, zk_node):
        """
        删除某一节点
        :param zk_node:
        :return:
        """
        self.zk.delete(zk_node)

    def create_node(self, node, flag=False):
        """
        创建zookeeper节点
        :param node:
        :param flag:
        :return:
        """
        try:
            self.zk.create(node, ephemeral=flag)
        except Exception as e:
            logging.info("create zookeeper node:%s failed, err:%s" % (node, e))
            print(node, e)
            return False
        return True

    def cp(self, src, dest):
        """
        copy the local file to zookeeper
        :param src:local file
        :param dest:zookeeper node
        :return:
        """
        if not os.path.isfile(src):
            print("%s: `%s': Local file does not exist" % ('cp', src))
            sys.exit()

        file_size = os.path.getsize(src)
        if file_size > 1048576:
            print("%s: `%s': Local file maximum limit of 1M" % ('cp', src))
            sys.exit()

        self.connect()
        if self.zk.exists(dest):
            print("%s: `%s': Zookeeper exists" % ('cp', dest))
            sys.exit()

        with open(src, 'rb') as file:
            data = file.read()

        self.zk.create(dest)
        self.zk.set(dest, value=data)

    def zk_get_merge_fn(self, process_path, work_node, cur_seq, filename_pool):
        """
        获取filename_pool下的序号,记录redo
        :param process_path
        :param work_node
        :param cur_seq:
        :param filename_pool:
        :return: zk_seq:
                       0: 返回0代表未到合并时间点
                       1: 返回1代表没有抢占到filename_pool
                       next_child:返回获取到的filename_pool节点
        """
        if not self.zk.exists(filename_pool):
            logging.error('no filename_pool in zookeeper')
            sys.exit()
        childs = self.zk.get_children(filename_pool)
        if not childs:
            logging.error('the zookeeper filename_pool is empty')
            sys.exit()
        # zk_fn_seq = childs[0]
        childs = sorted(childs)
        redo_info = []
        for child in childs:
            file_date, zk_seq, prov = child.split('.')
            zk_fs = ("%s%s" % (file_date, zk_seq))
            zk_fs = re.sub("[A-Za-z.]", "", zk_fs)
            if int(zk_fs) > int(cur_seq):
                logging.info('zk_seq:%s > cur_seq:%s, wait...' %
                             (zk_fs, cur_seq))
                return 0
            zk_seq = int(zk_seq) + 1
            if zk_seq > self.MAX_MERGE_FILE_SEQUENCE:
                zk_seq = 0
                file_date = datetime.datetime.strptime(file_date, '%Y%m%d')
                next_time = file_date + datetime.timedelta(days=1)
                file_date = ('%s%02d%02d' %
                             (next_time.year, next_time.month, next_time.day))
            zk_seq = "%03d" % zk_seq
            next_child = '%s.%s.%s' % (file_date, zk_seq, prov)
            # 创建一次事务,删除旧的序号并创建新的序号,保证原子性
            transaction_request = self.zk.transaction()
            transaction_request.delete("%s/%s" % (filename_pool, child))
            transaction_request.create("%s/%s" % (filename_pool, next_child))
            redo_seq = ",".join([file_date, zk_seq, prov])
            redo_info.append("filenamepool:" + redo_seq)
            redo_node = process_path + "/" + work_node + "/" + "redo"
            self.create_node(redo_node)
            self.set_node_value(redo_node, ";".join(redo_info).encode("utf-8"))
            results = transaction_request.commit()
            if results[0] is True and results[1] == (
                    "%s/%s" % (filename_pool, next_child)):
                return next_child
            else:
                continue
        return 1
Beispiel #29
0
class ZKHandler(object):
    def __init__(self, config, logger=None):
        """
        Initialize an instance of the ZKHandler class with config

        A zk_conn object will be created but not started

        A ZKSchema instance will be created
        """
        self.encoding = "utf8"
        self.coordinators = config["coordinators"]
        self.logger = logger
        self.zk_conn = KazooClient(hosts=self.coordinators)
        self._schema = ZKSchema()

    #
    # Class meta-functions
    #
    def coordinators(self):
        return str(self.coordinators)

    def log(self, message, state=""):
        if self.logger is not None:
            self.logger.out(message, state)
        else:
            print(message)

    #
    # Properties
    #
    @property
    def schema(self):
        return self._schema

    #
    # State/connection management
    #
    def listener(self, state):
        """
        Listen for KazooState changes and log accordingly.

        This function does not do anything except for log the state, and Kazoo handles the rest.
        """
        if state == KazooState.CONNECTED:
            self.log("Connection to Zookeeper resumed", state="o")
        else:
            self.log(
                "Connection to Zookeeper lost with state {}".format(state),
                state="w")

    def connect(self, persistent=False):
        """
        Start the zk_conn object and connect to the cluster
        """
        try:
            self.zk_conn.start()
            if persistent:
                self.log("Connection to Zookeeper started", state="o")
                self.zk_conn.add_listener(self.listener)
        except Exception as e:
            raise ZKConnectionException(self, e)

    def disconnect(self, persistent=False):
        """
        Stop and close the zk_conn object and disconnect from the cluster

        The class instance may be reused later (avoids persistent connections)
        """
        self.zk_conn.stop()
        self.zk_conn.close()
        if persistent:
            self.log("Connection to Zookeeper terminated", state="o")

    #
    # Schema helper actions
    #
    def get_schema_path(self, key):
        """
        Get the Zookeeper path for {key} from the current schema based on its format.

        If {key} is a tuple of length 2, it's treated as a path plus an item instance of that path (e.g. a node, a VM, etc.).

        If {key} is a tuple of length 4, it is treated as a path plus an item instance, as well as another item instance of the subpath.

        If {key} is just a string, it's treated as a lone path (mostly used for the 'base' schema group.

        Otherwise, returns None since this is not a valid key.

        This function also handles the special case where a string that looks like an existing path (i.e. starts with '/') is passed;
        in that case it will silently return the same path back. This was mostly a migration functionality and is deprecated.
        """
        if isinstance(key, tuple):
            # This is a key tuple with both an ipath and an item
            if len(key) == 2:
                # 2-length normal tuple
                ipath, item = key
            elif len(key) == 4:
                # 4-length sub-level tuple
                ipath, item, sub_ipath, sub_item = key
                return self.schema.path(ipath, item=item) + self.schema.path(
                    sub_ipath, item=sub_item)
            else:
                # This is an invalid key
                return None
        elif isinstance(key, str):
            # This is a key string with just an ipath
            ipath = key
            item = None

            # This is a raw key path, used by backup/restore functionality
            if re.match(r"^/", ipath):
                return ipath
        else:
            # This is an invalid key
            return None

        return self.schema.path(ipath, item=item)

    #
    # Key Actions
    #
    def exists(self, key):
        """
        Check if a key exists
        """
        path = self.get_schema_path(key)
        if path is None:
            # This path is invalid, this is likely due to missing schema entries, so return False
            return False

        stat = self.zk_conn.exists(path)
        if stat:
            return True
        else:
            return False

    def read(self, key):
        """
        Read data from a key
        """
        try:
            path = self.get_schema_path(key)
            if path is None:
                # This path is invalid; this is likely due to missing schema entries, so return None
                return None

            return self.zk_conn.get(path)[0].decode(self.encoding)
        except NoNodeError:
            return None

    def write(self, kvpairs):
        """
        Create or update one or more keys' data
        """
        if type(kvpairs) is not list:
            self.log("ZKHandler error: Key-value sequence is not a list",
                     state="e")
            return False

        transaction = self.zk_conn.transaction()

        for kvpair in kvpairs:
            if type(kvpair) is not tuple:
                self.log(
                    "ZKHandler error: Key-value pair '{}' is not a tuple".
                    format(kvpair),
                    state="e",
                )
                return False

            key = kvpair[0]
            value = kvpair[1]

            path = self.get_schema_path(key)
            if path is None:
                # This path is invalid; this is likely due to missing schema entries, so continue
                continue

            if not self.exists(key):
                # Creating a new key
                transaction.create(path, str(value).encode(self.encoding))

            else:
                # Updating an existing key
                data = self.zk_conn.get(path)
                version = data[1].version

                # Validate the expected version after the execution
                new_version = version + 1

                # Update the data
                transaction.set_data(path, str(value).encode(self.encoding))

                # Check the data
                try:
                    transaction.check(path, new_version)
                except TypeError:
                    self.log(
                        "ZKHandler error: Key '{}' does not match expected version"
                        .format(path),
                        state="e",
                    )
                    return False

        try:
            transaction.commit()
            return True
        except Exception as e:
            self.log(
                "ZKHandler error: Failed to commit transaction: {}".format(e),
                state="e")
            return False

    def delete(self, keys, recursive=True):
        """
        Delete a key or list of keys (defaults to recursive)
        """
        if type(keys) is not list:
            keys = [keys]

        for key in keys:
            if self.exists(key):
                try:
                    path = self.get_schema_path(key)
                    self.zk_conn.delete(path, recursive=recursive)
                except Exception as e:
                    self.log(
                        "ZKHandler error: Failed to delete key {}: {}".format(
                            path, e),
                        state="e",
                    )
                    return False

        return True

    def children(self, key):
        """
        Lists all children of a key
        """
        try:
            path = self.get_schema_path(key)
            if path is None:
                # This path is invalid; this is likely due to missing schema entries, so return None
                return None

            return self.zk_conn.get_children(path)
        except NoNodeError:
            return None

    def rename(self, kkpairs):
        """
        Rename one or more keys to a new value
        """
        if type(kkpairs) is not list:
            self.log("ZKHandler error: Key-key sequence is not a list",
                     state="e")
            return False

        transaction = self.zk_conn.transaction()

        def rename_element(transaction, source_path, destination_path):
            data = self.zk_conn.get(source_path)[0]
            transaction.create(destination_path, data)

            if self.children(source_path):
                for child_path in self.children(source_path):
                    child_source_path = "{}/{}".format(source_path, child_path)
                    child_destination_path = "{}/{}".format(
                        destination_path, child_path)
                    rename_element(transaction, child_source_path,
                                   child_destination_path)

            transaction.delete(source_path)

        for kkpair in kkpairs:
            if type(kkpair) is not tuple:
                self.log(
                    "ZKHandler error: Key-key pair '{}' is not a tuple".format(
                        kkpair),
                    state="e",
                )
                return False

            source_key = kkpair[0]
            source_path = self.get_schema_path(source_key)
            if source_path is None:
                # This path is invalid; this is likely due to missing schema entries, so continue
                continue

            destination_key = kkpair[1]
            destination_path = self.get_schema_path(destination_key)
            if destination_path is None:
                # This path is invalid; this is likely due to missing schema entries, so continue
                continue

            if not self.exists(source_key):
                self.log(
                    "ZKHander error: Source key '{}' does not exist".format(
                        source_path),
                    state="e",
                )
                return False

            if self.exists(destination_key):
                self.log(
                    "ZKHander error: Destination key '{}' already exists".
                    format(destination_path),
                    state="e",
                )
                return False

            rename_element(transaction, source_path, destination_path)

        try:
            transaction.commit()
            return True
        except Exception as e:
            self.log(
                "ZKHandler error: Failed to commit transaction: {}".format(e),
                state="e")
            return False

    #
    # Lock actions
    #
    def readlock(self, key):
        """
        Acquires a read lock on a key
        """
        count = 1
        lock = None

        path = self.get_schema_path(key)

        while True:
            try:
                lock_id = str(uuid.uuid1())
                lock = self.zk_conn.ReadLock(path, lock_id)
                break
            except NoNodeError:
                self.log(
                    "ZKHandler warning: Failed to acquire read lock on nonexistent path {}"
                    .format(path),
                    state="e",
                )
                return None
            except Exception as e:
                if count > 5:
                    self.log(
                        "ZKHandler warning: Failed to acquire read lock after 5 tries: {}"
                        .format(e),
                        state="e",
                    )
                    break
                else:
                    time.sleep(0.5)
                    count += 1
                    continue

        return lock

    def writelock(self, key):
        """
        Acquires a write lock on a key
        """
        count = 1
        lock = None

        path = self.get_schema_path(key)

        while True:
            try:
                lock_id = str(uuid.uuid1())
                lock = self.zk_conn.WriteLock(path, lock_id)
                break
            except NoNodeError:
                self.log(
                    "ZKHandler warning: Failed to acquire write lock on nonexistent path {}"
                    .format(path),
                    state="e",
                )
                return None
            except Exception as e:
                if count > 5:
                    self.log(
                        "ZKHandler warning: Failed to acquire write lock after 5 tries: {}"
                        .format(e),
                        state="e",
                    )
                    break
                else:
                    time.sleep(0.5)
                    count += 1
                    continue

        return lock

    def exclusivelock(self, key):
        """
        Acquires an exclusive lock on a key
        """
        count = 1
        lock = None

        path = self.get_schema_path(key)

        while True:
            try:
                lock_id = str(uuid.uuid1())
                lock = self.zk_conn.Lock(path, lock_id)
                break
            except NoNodeError:
                self.log(
                    "ZKHandler warning: Failed to acquire exclusive lock on nonexistent path {}"
                    .format(path),
                    state="e",
                )
                return None
            except Exception as e:
                if count > 5:
                    self.log(
                        "ZKHandler warning: Failed to acquire exclusive lock after 5 tries: {}"
                        .format(e),
                        state="e",
                    )
                    break
                else:
                    time.sleep(0.5)
                    count += 1
                    continue

        return lock