コード例 #1
0
    def update_task(self,
                    status,
                    message=None,
                    error_message=None,
                    stack_trace=''):
        if not self.tenant_client.is_sysadmin():
            stack_trace = ''

        if self.task is None:
            self.task = Task(self.sys_admin_client)

        if message is None:
            message = OP_MESSAGE[self.op]

        if self.task_resource is not None:
            task_href = self.task_resource.get('href')
        else:
            task_href = None

        self.task_resource = self.task.update(
            status=status.value,
            namespace='vcloud.cse',
            operation=message,
            operation_name=self.op,
            details='',
            progress=None,
            owner_href=f"urn:cse:cluster:{self.cluster_id}",
            owner_name=self.cluster_name,
            owner_type='application/vcloud.cse.cluster+xml',
            user_href=self.tenant_info['user_id'],
            user_name=self.tenant_info['user_name'],
            org_href=self.tenant_info['org_href'],
            task_href=task_href,
            error_message=error_message,
            stack_trace=stack_trace)
コード例 #2
0
 def test_0001_list_task(self):
     task_obj = Task(self.client)
     status_list = [TaskStatus.ERROR.value]
     records = task_obj.list_tasks(filter_status_list=status_list)
     n = len(list(records))
     self.logger.debug('found %s tasks' % n)
     assert n > 0
コード例 #3
0
def reload_templates(request_data, op_ctx):
    """."""
    user_context = op_ctx.get_user_context(api_version=None)
    user_client = user_context.client

    if not user_client.is_sysadmin:
        raise e.UnauthorizedRequestError(
            error_message=
            'Unauthorized to reload CSE native and TKG templates.'  # noqa: E501
        )

    org = vcd_utils.get_org(user_client, user_context.org_name)
    user_href = org.get_user(user_context.name).get('href')
    task = Task(user_client)
    task_resource = task.update(
        status=TaskStatus.RUNNING.value,
        namespace='vcloud.cse',
        operation="Reloading native templates.",
        operation_name='template operation',
        details='',
        progress=None,
        owner_href=user_context.org_href,
        owner_name=user_context.org_name,
        owner_type='application/vnd.vmware.vcloud.org+xml',
        user_href=user_href,
        user_name=user_context.name,
        org_href=user_context.org_href)
    task_href = task_resource.get('href')

    op_ctx.is_async = True
    _reload_templates_async(op_ctx, task_href)

    return {"task_href": task_href}
コード例 #4
0
    def remove_vdc_compute_policy_from_vdc(
            self,
            request_context: ctx.RequestContext,  # noqa: E501
            ovdc_id,
            compute_policy_href,
            remove_compute_policy_from_vms=False):  # noqa: E501
        """Delete the compute policy from the specified vdc.

        Note: The VDC compute policy need not be created by CSE.

        :param request_context: request context of remove compute policy
            request
        :param str ovdc_id: id of the vdc to assign the policy
        :param compute_policy_href: policy href to remove
        :param bool remove_compute_policy_from_vms: If True, will set affected
            VMs' compute policy to 'System Default'

        :return: dictionary containing 'task_href'.
        """
        # TODO find an efficient way without passing in request context
        vdc = vcd_utils.get_vdc(self._sysadmin_client, vdc_id=ovdc_id)

        org = vcd_utils.get_org(self._sysadmin_client)
        org.reload()
        user_name = self._session.get('user')
        user_href = org.get_user(user_name).get('href')

        task = Task(self._sysadmin_client)
        task_resource = task.update(
            status=vcd_client.TaskStatus.RUNNING.value,
            namespace='vcloud.cse',
            operation=f"Removing compute policy (href: {compute_policy_href})"
            f" from org VDC (vdc id: {ovdc_id})",
            operation_name='Remove org VDC compute policy',
            details='',
            progress=None,
            owner_href=vdc.href,
            owner_name=vdc.name,
            owner_type=vcd_client.EntityType.VDC.value,
            user_href=user_href,
            user_name=user_name,
            org_href=org.href)

        task_href = task_resource.get('href')
        request_context.is_async = True
        self._remove_compute_policy_from_vdc_async(
            request_context=request_context,
            task=task,
            task_href=task_href,
            user_href=user_href,
            org_href=org.href,
            ovdc_id=ovdc_id,
            compute_policy_href=compute_policy_href,
            remove_compute_policy_from_vms=remove_compute_policy_from_vms)

        return {'task_href': task_href}
コード例 #5
0
    def remove_vdc_compute_policy_from_vdc(
            self,  # noqa: E501
            ovdc_id,
            compute_policy_href,
            force=False):  # noqa: E501
        """Delete the compute policy from the specified vdc.

        :param str ovdc_id: id of the vdc to assign the policy
        :param compute_policy_href: policy href to remove
        :param bool force: If True, will set affected
            VMs' compute policy to 'System Default'

        :return: dictionary containing 'task_href'.
        """
        vdc = vcd_utils.get_vdc(self._sysadmin_client, vdc_id=ovdc_id)

        # TODO the following org will be associated with 'System' org.
        # task created should be associated with the corresponding org of the
        # vdc object.
        org = vcd_utils.get_org(self._sysadmin_client)
        org.reload()
        user_name = self._session.get('user')
        user_href = org.get_user(user_name).get('href')

        task = Task(self._sysadmin_client)
        task_resource = task.update(
            status=vcd_client.TaskStatus.RUNNING.value,
            namespace='vcloud.cse',
            operation=f"Removing compute policy (href: {compute_policy_href})"
            f" from org VDC (vdc id: {ovdc_id})",
            operation_name='Remove org VDC compute policy',
            details='',
            progress=None,
            owner_href=vdc.href,
            owner_name=vdc.name,
            owner_type=vcd_client.EntityType.VDC.value,
            user_href=user_href,
            user_name=user_name,
            org_href=org.href)

        task_href = task_resource.get('href')
        self._remove_compute_policy_from_vdc_async(
            ovdc_id=ovdc_id,
            compute_policy_href=compute_policy_href,
            task_resource=task_resource,
            force=force)

        return {'task_href': task_href}
コード例 #6
0
    def _update_task(self,
                     status,
                     message='',
                     error_message=None,
                     stack_trace=''):
        """Update task or create it if it does not exist.

        This function should only be used in the x_async functions, or in the
        6 common broker functions to create the required task.
        When this function is used, it logs in the sys admin client if it is
        not already logged in, but it does not log out. This is because many
        _update_task() calls are used in sequence until the task succeeds or
        fails. Once the task is updated to a success or failure state, then
        the sys admin client should be logged out.

        Another reason for decoupling sys admin logout and this function is
        because if any unknown errors occur during an operation, there should
        be a finally clause that takes care of logging out.
        """
        if not self.tenant_client.is_sysadmin():
            stack_trace = ''

        if self.task is None:
            self.task = Task(self.sys_admin_client)

        task_href = None
        if self.task_resource is not None:
            task_href = self.task_resource.get('href')

        org = vcd_utils.get_org(self.tenant_client)
        user_href = org.get_user(self.client_session.get('user')).get('href')

        self.task_resource = self.task.update(
            status=status.value,
            namespace='vcloud.cse',
            operation=message,
            operation_name='cluster operation',
            details='',
            progress=None,
            owner_href=self.tenant_org_href,
            owner_name=self.tenant_org_name,
            owner_type='application/vnd.vmware.vcloud.org+xml',
            user_href=user_href,
            user_name=self.tenant_user_name,
            org_href=self.tenant_org_href,
            task_href=task_href,
            error_message=error_message,
            stack_trace=stack_trace)
コード例 #7
0
    def remove_compute_policy_from_vdc(self, ovdc_id, compute_policy_href,
                                       remove_compute_policy_from_vms=False):
        """Delete the compute policy from the specified vdc.

        :param str ovdc_id: id of the vdc to assign the policy
        :param compute_policy_href: policy href to remove
        :param bool remove_compute_policy_from_vms: If True, will set affected
            VMs' compute policy to 'System Default'

        :return: dictionary containing 'task_href'.
        """
        vdc = pyvcd_utils.get_vdc(self._vcd_client, vdc_id=ovdc_id)

        # TODO is there no better way to get the client href?
        org = pyvcd_utils.get_org(self._vcd_client)
        org.reload()
        user_name = self._session.get('user')
        user_href = org.get_user(user_name).get('href')

        task = Task(self._vcd_client)
        task_resource = task.update(
            status=TaskStatus.RUNNING.value,
            namespace='vcloud.cse',
            operation=f"Removing compute policy (href: {compute_policy_href})"
                      f" from org VDC (vdc id: {ovdc_id})",
            operation_name='Remove org VDC compute policy',
            details='',
            progress=None,
            owner_href=vdc.href,
            owner_name=vdc.name,
            owner_type=EntityType.VDC.value,
            user_href=user_href,
            user_name=user_name,
            org_href=org.href)

        task_href = task_resource.get('href')
        self._remove_compute_policy_from_vdc_async(
            task=task,
            task_href=task_href,
            user_href=user_href,
            org_href=org.href,
            ovdc_id=ovdc_id,
            compute_policy_href=compute_policy_href,
            remove_compute_policy_from_vms=remove_compute_policy_from_vms)

        return {
            'task_href': task_href
        }
コード例 #8
0
ファイル: task.py プロジェクト: rdbwebster/vcd-cli
def list_tasks(ctx, status):
    try:
        client = ctx.obj['client']
        task_obj = Task(client)
        records = task_obj.list_tasks(filter_status_list=status)
        result = []
        for r in records:
            result.append(
                to_dict(r,
                        attributes=[
                            'name', 'status', 'objectName', 'ownerName',
                            'orgName', 'startDate', 'serviceNamespace', 'id'
                        ]))
        stdout(result, ctx, show_id=True)
    except Exception as e:
        stderr(e, ctx)
コード例 #9
0
ファイル: task.py プロジェクト: vmware/vca-cli
def list_tasks(ctx, status):
    try:
        restore_session(ctx)
        client = ctx.obj['client']
        task_obj = Task(client)
        records = task_obj.list_tasks(filter_status_list=status)
        result = []
        for r in records:
            result.append(
                to_dict(
                    r,
                    attributes=[
                        'name', 'status', 'objectName', 'ownerName', 'orgName',
                        'startDate', 'serviceNamespace', 'id'
                    ]))
        stdout(result, ctx, show_id=True)
    except Exception as e:
        stderr(e, ctx)
コード例 #10
0
 def update_task(self, status, operation, message=None, error_message=None):
     if not hasattr(self, 'task'):
         self.task = Task(self.client_sysadmin)
     if message is None:
         message = OP_MESSAGE[operation]
     if hasattr(self, 't'):
         task_href = self.t.get('href')
     else:
         task_href = None
     self.t = self.task.update(status.value,
                               'vcloud.cse',
                               message,
                               operation,
                               '',
                               None,
                               'urn:cse:cluster:%s' % self.cluster_id,
                               self.cluster_name,
                               'application/vcloud.cse.cluster+xml',
                               self.tenant_info['user_id'],
                               self.tenant_info['user_name'],
                               org_href=self.tenant_info['org_href'],
                               task_href=task_href,
                               error_message=error_message)
コード例 #11
0
    def _remove_compute_policy_from_vdc_async(self,
                                              *args,
                                              ovdc_id,
                                              compute_policy_href,
                                              task_resource,
                                              force=False):
        vdc = vcd_utils.get_vdc(self._sysadmin_client,
                                vdc_id=ovdc_id,
                                is_admin_operation=True)
        task_href = task_resource.get('href')
        user_href = task_resource.User.get('href')
        org_href = task_resource.Organization.get('href')
        task = Task(client=self._sysadmin_client)
        try:
            self.remove_compute_policy_from_vdc_sync(
                vdc=vdc,
                compute_policy_href=compute_policy_href,
                task_resource=task_resource,
                force=force)

            task.update(
                status=vcd_client.TaskStatus.SUCCESS.value,
                namespace='vcloud.cse',
                operation=f"Removed compute policy (href: "
                f"{compute_policy_href}) from org VDC '{vdc.name}'",  # noqa: E501
                operation_name='Updating VDC',
                details='',
                progress=None,
                owner_href=vdc.href,
                owner_name=vdc.name,
                owner_type=vcd_client.EntityType.VDC.value,
                user_href=user_href,
                user_name=self._session.get('user'),
                task_href=task_href,
                org_href=org_href,
            )
        except Exception as err:
            msg = f'Failed to remove compute policy: {compute_policy_href} ' \
                  f'from the OVDC: {vdc.name}'
            logger.SERVER_LOGGER.error(msg)  # noqa: E501
            task.update(status=vcd_client.TaskStatus.ERROR.value,
                        namespace='vcloud.cse',
                        operation=msg,
                        operation_name='Remove org VDC compute policy',
                        details='',
                        progress=None,
                        owner_href=vdc.href,
                        owner_name=vdc.name,
                        owner_type=vcd_client.EntityType.VDC.value,
                        user_href=user_href,
                        user_name=self._session.get('user'),
                        task_href=task_href,
                        org_href=org_href,
                        error_message=f"{err}",
                        stack_trace='')
コード例 #12
0
class DefaultBroker(threading.Thread):
    def __init__(self, config):
        threading.Thread.__init__(self)
        self.config = config
        self.host = config['vcd']['host']
        self.username = config['vcd']['username']
        self.password = config['vcd']['password']
        self.version = config['vcd']['api_version']
        self.verify = config['vcd']['verify']
        self.log = config['vcd']['log']

    def _connect_sysadmin(self):
        if not self.verify:
            LOGGER.warning('InsecureRequestWarning: '
                           'Unverified HTTPS request is being made. '
                           'Adding certificate verification is strongly '
                           'advised.')
            requests.packages.urllib3.disable_warnings()
        self.client_sysadmin = Client(uri=self.host,
                                      api_version=self.version,
                                      verify_ssl_certs=self.verify,
                                      log_headers=True,
                                      log_bodies=True)
        credentials = BasicLoginCredentials(self.username, SYSTEM_ORG_NAME,
                                            self.password)
        self.client_sysadmin.set_credentials(credentials)

    def _connect_tenant(self, headers):
        token = headers.get('x-vcloud-authorization')
        accept_header = headers.get('Accept')
        version = accept_header.split('version=')[1]
        self.client_tenant = Client(uri=self.host,
                                    api_version=version,
                                    verify_ssl_certs=self.verify,
                                    log_headers=True,
                                    log_bodies=True)
        session = self.client_tenant.rehydrate_from_token(token)
        return {
            'user_name':
            session.get('user'),
            'user_id':
            session.get('userId'),
            'org_name':
            session.get('org'),
            'org_href':
            self.client_tenant._get_wk_endpoint(
                _WellKnownEndpoint.LOGGED_IN_ORG)
        }

    def _to_message(self, e):
        if hasattr(e, 'message'):
            return {'message': e.message}
        else:
            return {'message': str(e)}

    def update_task(self, status, message=None, error_message=None):
        if not hasattr(self, 'task'):
            self.task = Task(self.client_sysadmin)
        if message is None:
            message = OP_MESSAGE[self.op]
        if hasattr(self, 'task_resource'):
            task_href = self.task_resource.get('href')
        else:
            task_href = None
        self.task_resource = self.task.update(
            status.value,
            'vcloud.cse',
            message,
            self.op,
            '',
            None,
            'urn:cse:cluster:%s' % self.cluster_id,
            self.cluster_name,
            'application/vcloud.cse.cluster+xml',
            self.tenant_info['user_id'],
            self.tenant_info['user_name'],
            org_href=self.tenant_info['org_href'],
            task_href=task_href,
            error_message=error_message)

    def is_valid_name(self, name):
        """Validate that the cluster name against the pattern."""
        if len(name) > MAX_HOST_NAME_LENGTH:
            return False
        if name[-1] == '.':
            name = name[:-1]
        allowed = re.compile("(?!-)[A-Z\d-]{1,63}(?<!-)$", re.IGNORECASE)
        return all(allowed.match(x) for x in name.split("."))

    def get_template(self, name=None):
        if name is None:
            if 'template' in self.body and self.body['template'] is not None:
                name = self.body['template']
            else:
                name = self.config['broker']['default_template']
        for template in self.config['broker']['templates']:
            if template['name'] == name:
                return template
        raise Exception('Template %s not found' % name)

    def run(self):
        LOGGER.debug('thread started op=%s' % self.op)
        if self.op == OP_CREATE_CLUSTER:
            self.create_cluster_thread()
        elif self.op == OP_DELETE_CLUSTER:
            self.delete_cluster_thread()
        elif self.op == OP_CREATE_NODES:
            self.create_nodes_thread()
        elif self.op == OP_DELETE_NODES:
            self.delete_nodes_thread()

    @exception_handler
    def list_clusters(self, headers, body):
        result = {}
        result['body'] = []
        result['status_code'] = OK
        self._connect_tenant(headers)
        clusters = load_from_metadata(self.client_tenant)
        result['body'] = clusters
        return result

    @exception_handler
    def get_cluster_info(self, name, headers, body):
        """Get the info of the cluster.

        :param cluster_name: (str): Name of the cluster
        :param headers: (str): Request headers

        :return: (dict): Info of the cluster.
        """

        result = {}
        result['body'] = []
        result['status_code'] = OK
        self._connect_tenant(headers)
        clusters = load_from_metadata(self.client_tenant, name=name)
        if len(clusters) == 0:
            raise CseServerError('Cluster \'%s\' not found.' % name)
        vapp = VApp(self.client_tenant, href=clusters[0]['vapp_href'])
        vms = vapp.get_all_vms()
        for vm in vms:
            node_info = {'name': vm.get('name'), 'ipAddress': ''}
            try:
                node_info['ipAddress'] = vapp.get_primary_ip(vm.get('name'))
            except Exception:
                LOGGER.debug('cannot get ip address for node %s' %
                             vm.get('name'))
            if vm.get('name').startswith(TYPE_MASTER):
                clusters[0].get('master_nodes').append(node_info)
            elif vm.get('name').startswith(TYPE_NODE):
                clusters[0].get('nodes').append(node_info)
            elif vm.get('name').startswith(TYPE_NFS):
                clusters[0].get('nfs_nodes').append(node_info)
        result['body'] = clusters[0]
        return result

    @exception_handler
    def get_node_info(self, cluster_name, node_name, headers):
        """Get the info of a given node in the cluster.

        :param cluster_name: (str): Name of the cluster
        :param node_name: (str): Name of the node
        :param headers: (str): Request headers

        :return: (dict): Info of the node.
        """
        result = {}

        result['body'] = []
        result['status_code'] = OK
        self._connect_tenant(headers)
        clusters = load_from_metadata(self.client_tenant, name=cluster_name)
        if len(clusters) == 0:
            raise CseServerError('Cluster \'%s\' not found.' % cluster_name)
        vapp = VApp(self.client_tenant, href=clusters[0]['vapp_href'])
        vms = vapp.get_all_vms()
        node_info = None
        for vm in vms:
            if (node_name == vm.get('name')):
                node_info = {
                    'name': vm.get('name'),
                    'numberOfCpus': '',
                    'memoryMB': '',
                    'status': VCLOUD_STATUS_MAP.get(int(vm.get('status'))),
                    'ipAddress': ''
                }
                if hasattr(vm, 'VmSpecSection'):
                    node_info['numberOfCpus'] = vm.VmSpecSection.NumCpus.text
                    node_info[
                        'memoryMB'] = \
                        vm.VmSpecSection.MemoryResourceMb.Configured.text
                try:
                    node_info['ipAddress'] = vapp.get_primary_ip(
                        vm.get('name'))
                except Exception:
                    LOGGER.debug('cannot get ip address '
                                 'for node %s' % vm.get('name'))
                if vm.get('name').startswith(TYPE_MASTER):
                    node_info['node_type'] = 'master'
                elif vm.get('name').startswith(TYPE_NODE):
                    node_info['node_type'] = 'node'
                elif vm.get('name').startswith(TYPE_NFS):
                    node_info['node_type'] = 'nfsd'
                    exports = self._get_nfs_exports(node_info['ipAddress'],
                                                    vapp, vm)
                    node_info['exports'] = exports
        if node_info is None:
            raise CseServerError('Node \'%s\' not found in cluster \'%s\'' %
                                 (node_name, cluster_name))
        result['body'] = node_info
        return result

    def _get_nfs_exports(self, ip, vapp, node):
        """Get the exports from remote NFS server (helper method).

        :param ip: (str): IP address of the NFS server
        :param vapp: (pyvcloud.vcd.vapp.VApp): The vApp or cluster
         to which node belongs
        :param node: (str): IP address of the NFS server
        :param node: (`lxml.objectify.StringElement`) object
        representing the vm resource.

        :return: (List): List of exports
        """
        # TODO(right template) find a right way to retrieve
        # the template from which nfs node was created.
        template = self.config['broker']['templates'][0]
        script = '#!/usr/bin/env bash\nshowmount -e %s' % ip
        result = execute_script_in_nodes(self.config,
                                         vapp,
                                         template['admin_password'],
                                         script,
                                         nodes=[node],
                                         check_tools=False)
        lines = result[0][1].content.decode().split('\n')
        exports = []
        for index in range(1, len(lines) - 1):
            export = lines[index].strip().split()[0]
            exports.append(export)
        return exports

    @exception_handler
    def create_cluster(self, headers, body):
        result = {}
        result['body'] = {}
        cluster_name = body['name']
        vdc_name = body['vdc']
        node_count = body['node_count']
        LOGGER.debug('about to create cluster %s on %s with %s nodes, sp=%s',
                     cluster_name, vdc_name, node_count,
                     body['storage_profile'])
        result['body'] = {
            'message': 'can\'t create cluster \'%s\'' % cluster_name
        }

        if not self.is_valid_name(cluster_name):
            raise CseServerError(f"Invalid cluster name \'{cluster_name}\'")
        self.tenant_info = self._connect_tenant(headers)
        self.headers = headers
        self.body = body
        self.cluster_name = cluster_name
        self.cluster_id = str(uuid.uuid4())
        self.op = OP_CREATE_CLUSTER
        self._connect_sysadmin()
        self.update_task(TaskStatus.RUNNING,
                         message='Creating cluster %s(%s)' %
                         (cluster_name, self.cluster_id))
        self.daemon = True
        self.start()
        response_body = {}
        response_body['name'] = self.cluster_name
        response_body['cluster_id'] = self.cluster_id
        response_body['task_href'] = self.task_resource.get('href')
        result['body'] = response_body
        result['status_code'] = ACCEPTED
        return result

    @rollback
    def create_cluster_thread(self):
        network_name = self.body['network']
        try:
            clusters = load_from_metadata(self.client_tenant,
                                          name=self.cluster_name)
            if len(clusters) != 0:
                raise ClusterAlreadyExistsError(
                    f'Cluster {self.cluster_name} already exists.')
            org_resource = self.client_tenant.get_org()
            org = Org(self.client_tenant, resource=org_resource)
            vdc_resource = org.get_vdc(self.body['vdc'])
            vdc = VDC(self.client_tenant, resource=vdc_resource)
            template = self.get_template()
            self.update_task(TaskStatus.RUNNING,
                             message='Creating cluster vApp %s(%s)' %
                             (self.cluster_name, self.cluster_id))
            try:
                vapp_resource = vdc.create_vapp(self.cluster_name,
                                                description='cluster %s' %
                                                self.cluster_name,
                                                network=network_name,
                                                fence_mode='bridged')
            except Exception as e:
                raise ClusterOperationError('Error while creating vApp:',
                                            str(e))

            self.client_tenant.get_task_monitor().wait_for_status(
                vapp_resource.Tasks.Task[0])
            tags = {}
            tags['cse.cluster.id'] = self.cluster_id
            tags['cse.version'] = pkg_resources.require(
                'container-service-extension')[0].version
            tags['cse.template'] = template['name']
            vapp = VApp(self.client_tenant, href=vapp_resource.get('href'))
            for k, v in tags.items():
                task = vapp.set_metadata('GENERAL', 'READWRITE', k, v)
                self.client_tenant.get_task_monitor().wait_for_status(task)
            self.update_task(TaskStatus.RUNNING,
                             message='Creating master node for %s(%s)' %
                             (self.cluster_name, self.cluster_id))
            vapp.reload()

            try:
                add_nodes(1, template, TYPE_MASTER, self.config,
                          self.client_tenant, org, vdc, vapp, self.body)
            except Exception as e:
                raise MasterNodeCreationError(
                    "Error while adding master node:", str(e))

            self.update_task(TaskStatus.RUNNING,
                             message='Initializing cluster %s(%s)' %
                             (self.cluster_name, self.cluster_id))
            vapp.reload()
            init_cluster(self.config, vapp, template)
            master_ip = get_master_ip(self.config, vapp, template)
            task = vapp.set_metadata('GENERAL', 'READWRITE', 'cse.master.ip',
                                     master_ip)
            self.client_tenant.get_task_monitor().wait_for_status(task)
            if self.body['node_count'] > 0:
                self.update_task(TaskStatus.RUNNING,
                                 message='Creating %s node(s) for %s(%s)' %
                                 (self.body['node_count'], self.cluster_name,
                                  self.cluster_id))
                try:
                    add_nodes(self.body['node_count'], template, TYPE_NODE,
                              self.config, self.client_tenant, org, vdc, vapp,
                              self.body)
                except Exception as e:
                    raise WorkerNodeCreationError(
                        "Error while creating worker node:", str(e))

                self.update_task(TaskStatus.RUNNING,
                                 message='Adding %s node(s) to %s(%s)' %
                                 (self.body['node_count'], self.cluster_name,
                                  self.cluster_id))
                vapp.reload()
                join_cluster(self.config, vapp, template)
            if self.body['enable_nfs']:
                self.update_task(TaskStatus.RUNNING,
                                 message='Creating NFS node for %s(%s)' %
                                 (self.cluster_name, self.cluster_id))
                try:
                    add_nodes(1, template, TYPE_NFS, self.config,
                              self.client_tenant, org, vdc, vapp, self.body)
                except Exception as e:
                    raise NFSNodeCreationError(
                        "Error while creating NFS node:", str(e))

            self.update_task(TaskStatus.SUCCESS,
                             message='Created cluster %s(%s)' %
                             (self.cluster_name, self.cluster_id))
        except (MasterNodeCreationError, WorkerNodeCreationError,
                NFSNodeCreationError, ClusterJoiningError,
                ClusterInitializationError, ClusterOperationError) as e:
            LOGGER.error(traceback.format_exc())
            error_obj = error_to_json(e)
            self.update_task(
                TaskStatus.ERROR,
                error_message=error_obj[ERROR_MESSAGE][ERROR_DESCRIPTION])
            raise e
        except Exception as e:
            LOGGER.error(traceback.format_exc())
            error_obj = error_to_json(e)
            self.update_task(
                TaskStatus.ERROR,
                error_message=error_obj[ERROR_MESSAGE][ERROR_DESCRIPTION])

    @exception_handler
    def delete_cluster(self, headers, body):
        result = {}
        result['body'] = {}
        LOGGER.debug('about to delete cluster with name: %s' % body['name'])
        result['status_code'] = INTERNAL_SERVER_ERROR

        self.cluster_name = body['name']
        self.tenant_info = self._connect_tenant(headers)
        self.headers = headers
        self.body = body
        self.op = OP_DELETE_CLUSTER
        self._connect_sysadmin()
        clusters = load_from_metadata(self.client_tenant,
                                      name=self.cluster_name)
        if len(clusters) != 1:
            raise CseServerError('Cluster %s not found.' % self.cluster_name)
        self.cluster = clusters[0]
        self.cluster_id = self.cluster['cluster_id']
        self.update_task(TaskStatus.RUNNING,
                         message='Deleting cluster %s(%s)' %
                         (self.cluster_name, self.cluster_id))
        self.daemon = True
        self.start()
        response_body = {}
        response_body['cluster_name'] = self.cluster_name
        response_body['task_href'] = self.task_resource.get('href')
        result['body'] = response_body
        result['status_code'] = ACCEPTED
        return result

    def delete_cluster_thread(self):
        LOGGER.debug('about to delete cluster with name: %s',
                     self.cluster_name)
        try:
            vdc = VDC(self.client_tenant, href=self.cluster['vdc_href'])
            task = vdc.delete_vapp(self.cluster['name'], force=True)
            self.client_tenant.get_task_monitor().wait_for_status(task)
            self.update_task(TaskStatus.SUCCESS,
                             message='Deleted cluster %s(%s)' %
                             (self.cluster_name, self.cluster_id))
        except Exception as e:
            LOGGER.error(traceback.format_exc())
            self.update_task(TaskStatus.ERROR, error_message=str(e))

    @exception_handler
    def get_cluster_config(self, cluster_name, headers):
        result = {}
        self._connect_tenant(headers)
        clusters = load_from_metadata(self.client_tenant, name=cluster_name)
        if len(clusters) != 1:
            raise CseServerError('Cluster \'%s\' not found' % cluster_name)
        vapp = VApp(self.client_tenant, href=clusters[0]['vapp_href'])
        template = self.get_template(name=clusters[0]['template'])
        result['body'] = get_cluster_config(self.config, vapp,
                                            template['admin_password'])
        result['status_code'] = OK
        return result

    @exception_handler
    def create_nodes(self, headers, body):
        result = {'body': {}}
        self.cluster_name = body['name']
        LOGGER.debug('about to add %s nodes to cluster %s on VDC %s, sp=%s',
                     body['node_count'], self.cluster_name, body['vdc'],
                     body['storage_profile'])
        if body['node_count'] < 1:
            raise CseServerError('Invalid node count: %s.' %
                                 body['node_count'])
        self.tenant_info = self._connect_tenant(headers)
        clusters = load_from_metadata(self.client_tenant,
                                      name=self.cluster_name)
        if len(clusters) != 1:
            raise CseServerError('Cluster \'%s\' not found.' %
                                 self.cluster_name)
        self.cluster = clusters[0]
        self.headers = headers
        self.body = body
        self.op = OP_CREATE_NODES
        self._connect_sysadmin()
        self.cluster_id = self.cluster['cluster_id']
        self.update_task(
            TaskStatus.RUNNING,
            message='Adding %s node(s) to cluster %s(%s)' %
            (body['node_count'], self.cluster_name, self.cluster_id))
        self.daemon = True
        self.start()
        response_body = {}
        response_body['cluster_name'] = self.cluster_name
        response_body['task_href'] = self.task_resource.get('href')
        result['body'] = response_body
        result['status_code'] = ACCEPTED
        return result

    @rollback
    def create_nodes_thread(self):
        LOGGER.debug('about to add nodes to cluster with name: %s',
                     self.cluster_name)
        try:
            org_resource = self.client_tenant.get_org()
            org = Org(self.client_tenant, resource=org_resource)
            vdc = VDC(self.client_tenant, href=self.cluster['vdc_href'])
            vapp = VApp(self.client_tenant, href=self.cluster['vapp_href'])
            template = self.get_template()
            self.update_task(
                TaskStatus.RUNNING,
                message='Creating %s node(s) for %s(%s)' %
                (self.body['node_count'], self.cluster_name, self.cluster_id))
            new_nodes = add_nodes(self.body['node_count'], template,
                                  self.body['node_type'], self.config,
                                  self.client_tenant, org, vdc, vapp,
                                  self.body)
            if self.body['node_type'] == TYPE_NFS:
                self.update_task(TaskStatus.SUCCESS,
                                 message='Created %s node(s) for %s(%s)' %
                                 (self.body['node_count'], self.cluster_name,
                                  self.cluster_id))
            elif self.body['node_type'] == TYPE_NODE:
                self.update_task(TaskStatus.RUNNING,
                                 message='Adding %s node(s) to %s(%s)' %
                                 (self.body['node_count'], self.cluster_name,
                                  self.cluster_id))
                target_nodes = []
                for spec in new_nodes['specs']:
                    target_nodes.append(spec['target_vm_name'])
                vapp.reload()
                join_cluster(self.config, vapp, template, target_nodes)
                self.update_task(TaskStatus.SUCCESS,
                                 message='Added %s node(s) to cluster %s(%s)' %
                                 (self.body['node_count'], self.cluster_name,
                                  self.cluster_id))
        except NodeCreationError as e:
            error_obj = error_to_json(e)
            LOGGER.error(traceback.format_exc())
            self.update_task(
                TaskStatus.ERROR,
                error_message=error_obj[ERROR_MESSAGE][ERROR_DESCRIPTION])
            raise
        except Exception as e:
            error_obj = error_to_json(e)
            LOGGER.error(traceback.format_exc())
            self.update_task(
                TaskStatus.ERROR,
                error_message=error_obj[ERROR_MESSAGE][ERROR_DESCRIPTION])

    @exception_handler
    def delete_nodes(self, headers, body):
        result = {'body': {}}
        self.cluster_name = body['name']
        LOGGER.debug('about to delete nodes from cluster with name: %s' %
                     body['name'])

        if len(body['nodes']) < 1:
            raise CseServerError('Invalid list of nodes: %s.' % body['nodes'])
        for node in body['nodes']:
            if node.startswith(TYPE_MASTER):
                raise CseServerError('Can\'t delete a master node: \'%s\'.' %
                                     node)
        self.tenant_info = self._connect_tenant(headers)
        clusters = load_from_metadata(self.client_tenant,
                                      name=self.cluster_name)
        if len(clusters) != 1:
            raise CseServerError('Cluster \'%s\' not found.' %
                                 self.cluster_name)
        self.cluster = clusters[0]
        self.headers = headers
        self.body = body
        self.op = OP_DELETE_NODES
        self._connect_sysadmin()
        self.cluster_id = self.cluster['cluster_id']
        self.update_task(
            TaskStatus.RUNNING,
            message='Deleting %s node(s) from cluster %s(%s)' %
            (len(body['nodes']), self.cluster_name, self.cluster_id))
        self.daemon = True
        self.start()
        response_body = {}
        response_body['cluster_name'] = self.cluster_name
        response_body['task_href'] = self.task_resource.get('href')
        result['body'] = response_body
        result['status_code'] = ACCEPTED
        return result

    def delete_nodes_thread(self):
        LOGGER.debug('about to delete nodes from cluster with name: %s',
                     self.cluster_name)
        try:
            vapp = VApp(self.client_tenant, href=self.cluster['vapp_href'])
            template = self.get_template()
            self.update_task(
                TaskStatus.RUNNING,
                message='Deleting %s node(s) from %s(%s)' %
                (len(self.body['nodes']), self.cluster_name, self.cluster_id))
            try:
                delete_nodes_from_cluster(self.config, vapp, template,
                                          self.body['nodes'],
                                          self.body['force'])
            except Exception:
                LOGGER.error("Couldn't delete node %s from cluster:%s" %
                             (self.body['nodes'], self.cluster_name))
            self.update_task(
                TaskStatus.RUNNING,
                message='Undeploying %s node(s) for %s(%s)' %
                (len(self.body['nodes']), self.cluster_name, self.cluster_id))
            for vm_name in self.body['nodes']:
                vm = VM(self.client_tenant, resource=vapp.get_vm(vm_name))
                try:
                    task = vm.undeploy()
                    self.client_tenant.get_task_monitor().wait_for_status(task)
                except Exception as e:
                    LOGGER.warning('couldn\'t undeploy VM %s' % vm_name)
            self.update_task(
                TaskStatus.RUNNING,
                message='Deleting %s VM(s) for %s(%s)' %
                (len(self.body['nodes']), self.cluster_name, self.cluster_id))
            task = vapp.delete_vms(self.body['nodes'])
            self.client_tenant.get_task_monitor().wait_for_status(task)
            self.update_task(
                TaskStatus.SUCCESS,
                message='Deleted %s node(s) to cluster %s(%s)' %
                (len(self.body['nodes']), self.cluster_name, self.cluster_id))
        except Exception as e:
            LOGGER.error(traceback.format_exc())
            self.update_task(TaskStatus.ERROR, error_message=str(e))

    def node_rollback(self, node_list):
        """Implements rollback for node creation failure

        :param list node_list: faulty nodes to be deleted
        """
        LOGGER.info('About to rollback nodes from cluster with name: %s' %
                    self.cluster_name)
        LOGGER.info('Node list to be deleted:%s' % node_list)
        vapp = VApp(self.client_tenant, href=self.cluster['vapp_href'])
        template = self.get_template()
        try:
            delete_nodes_from_cluster(self.config,
                                      vapp,
                                      template,
                                      node_list,
                                      force=True)
        except Exception:
            LOGGER.warning("Couldn't delete node %s from cluster:%s" %
                           (node_list, self.cluster_name))
        for vm_name in node_list:
            vm = VM(self.client_tenant, resource=vapp.get_vm(vm_name))
            try:
                vm.undeploy()
            except Exception:
                LOGGER.warning("Couldn't undeploy VM %s" % vm_name)
        vapp.delete_vms(node_list)
        LOGGER.info('Successfully deleted nodes: %s' % node_list)

    def cluster_rollback(self):
        """Implements rollback for cluster creation failure"""
        LOGGER.info('About to rollback cluster with name: %s' %
                    self.cluster_name)
        clusters = load_from_metadata(self.client_tenant,
                                      name=self.cluster_name)
        if len(clusters) != 1:
            LOGGER.debug('Cluster %s not found.' % self.cluster_name)
            return
        self.cluster = clusters[0]
        vdc = VDC(self.client_tenant, href=self.cluster['vdc_href'])
        vdc.delete_vapp(self.cluster['name'], force=True)
        LOGGER.info('Successfully deleted cluster: %s' % self.cluster_name)
コード例 #13
0
class DefaultBroker(threading.Thread):
    def __init__(self, config):
        threading.Thread.__init__(self)
        self.config = config
        self.host = config['vcd']['host']
        self.username = config['vcd']['username']
        self.password = config['vcd']['password']
        self.version = config['vcd']['api_version']
        self.verify = config['vcd']['verify']
        self.log = config['vcd']['log']

    def _connect_sysadmin(self):
        if not self.verify:
            LOGGER.warning('InsecureRequestWarning: '
                           'Unverified HTTPS request is being made. '
                           'Adding certificate verification is strongly '
                           'advised.')
            requests.packages.urllib3.disable_warnings()
        self.client_sysadmin = Client(uri=self.host,
                                      api_version=self.version,
                                      verify_ssl_certs=self.verify,
                                      log_file='sysadmin.log',
                                      log_headers=True,
                                      log_bodies=True)
        self.client_sysadmin.set_credentials(
            BasicLoginCredentials(self.username, 'System', self.password))

    def _connect_tenant(self, headers):
        token = headers.get('x-vcloud-authorization')
        accept_header = headers.get('Accept')
        version = accept_header.split('version=')[1]
        self.client_tenant = Client(uri=self.host,
                                    api_version=version,
                                    verify_ssl_certs=self.verify,
                                    log_file='tenant.log',
                                    log_headers=True,
                                    log_bodies=True)
        session = self.client_tenant.rehydrate_from_token(token)
        return {
            'user_name':
            session.get('user'),
            'user_id':
            session.get('userId'),
            'org_name':
            session.get('org'),
            'org_href':
            self.client_tenant._get_wk_endpoint(
                _WellKnownEndpoint.LOGGED_IN_ORG)
        }

    def update_task(self, status, operation, message=None, error_message=None):
        if not hasattr(self, 'task'):
            self.task = Task(self.client_sysadmin)
        if message is None:
            message = OP_MESSAGE[operation]
        if hasattr(self, 't'):
            task_href = self.t.get('href')
        else:
            task_href = None
        self.t = self.task.update(status.value,
                                  'vcloud.cse',
                                  message,
                                  operation,
                                  '',
                                  None,
                                  'urn:cse:cluster:%s' % self.cluster_id,
                                  self.cluster_name,
                                  'application/vcloud.cse.cluster+xml',
                                  self.tenant_info['user_id'],
                                  self.tenant_info['user_name'],
                                  org_href=self.tenant_info['org_href'],
                                  task_href=task_href,
                                  error_message=error_message)

    def is_valid_name(self, name):
        """Validates that the cluster name against the pattern.

        """
        if len(name) > MAX_HOST_NAME_LENGTH:
            return False
        if name[-1] == '.':
            name = name[:-1]
        allowed = re.compile("(?!-)[A-Z\d-]{1,63}(?<!-)$", re.IGNORECASE)
        return all(allowed.match(x) for x in name.split("."))

    def get_template(self, name=None):
        if name is None:
            if 'template' in self.body and self.body['template'] is not None:
                name = self.body['template']
            else:
                name = self.config['broker']['default_template']
        for template in self.config['broker']['templates']:
            if template['name'] == name:
                return template
        raise Exception('Template %s not found' % name)

    def run(self):
        LOGGER.debug('thread started op=%s' % self.op)
        if self.op == OP_CREATE_CLUSTER:
            self.create_cluster_thread()
        elif self.op == OP_DELETE_CLUSTER:
            self.delete_cluster_thread()

    def list_clusters(self, headers, body):
        result = {}
        try:
            result['body'] = []
            result['status_code'] = OK
            self._connect_tenant(headers)
            clusters = load_from_metadata(self.client_tenant,
                                          get_leader_ip=True)
            result['body'] = clusters
        except Exception:
            LOGGER.error(traceback.format_exc())
            result['body'] = []
            result['status_code'] = INTERNAL_SERVER_ERROR
            result['message'] = traceback.format_exc()
        return result

    def create_cluster(self, headers, body):
        result = {}
        result['body'] = {}
        cluster_name = body['name']
        vdc_name = body['vdc']
        node_count = body['node_count']
        LOGGER.debug('about to create cluster %s on %s with %s nodes, sp=%s',
                     cluster_name, vdc_name, node_count,
                     body['storage_profile'])
        result['body'] = {'message': 'can\'t create cluster %s' % cluster_name}
        result['status_code'] = INTERNAL_SERVER_ERROR
        try:
            if not self.is_valid_name(cluster_name):
                raise Exception('Invalid cluster name')
            self.tenant_info = self._connect_tenant(headers)
            self.headers = headers
            self.body = body
            self.cluster_name = cluster_name
            self.cluster_id = str(uuid.uuid4())
            self.op = OP_CREATE_CLUSTER
            self._connect_sysadmin()
            self.update_task(TaskStatus.RUNNING,
                             self.op,
                             message='Creating cluster %s(%s)' %
                             (cluster_name, self.cluster_id))
            self.daemon = True
            self.start()
            response_body = {}
            response_body['name'] = self.cluster_name
            response_body['cluster_id'] = self.cluster_id
            response_body['task_href'] = self.t.get('href')
            result['body'] = response_body
            result['status_code'] = ACCEPTED
        except Exception as e:
            result['body'] = {'message': e.message}
            LOGGER.error(traceback.format_exc())
        return result

    def create_cluster_thread(self):
        network_name = self.body['network']
        try:
            clusters = load_from_metadata(self.client_tenant,
                                          name=self.cluster_name)
            if len(clusters) != 0:
                raise Exception('Cluster already exists.')
            org_resource = self.client_tenant.get_org()
            org = Org(self.client_tenant, resource=org_resource)
            vdc_resource = org.get_vdc(self.body['vdc'])
            vdc = VDC(self.client_tenant, resource=vdc_resource)
            template = self.get_template()
            self.update_task(TaskStatus.RUNNING,
                             self.op,
                             message='Creating cluster vApp %s(%s)' %
                             (self.cluster_name, self.cluster_id))
            vapp_resource = vdc.create_vapp(self.cluster_name,
                                            description='cluster %s' %
                                            self.cluster_name,
                                            network=network_name,
                                            fence_mode='bridged')
            t = self.client_tenant.get_task_monitor().wait_for_status(
                task=vapp_resource.Tasks.Task[0],
                timeout=60,
                poll_frequency=2,
                fail_on_status=None,
                expected_target_statuses=[
                    TaskStatus.SUCCESS, TaskStatus.ABORTED, TaskStatus.ERROR,
                    TaskStatus.CANCELED
                ],
                callback=None)
            assert t.get('status').lower() == TaskStatus.SUCCESS.value
            tags = {}
            tags['cse.cluster.id'] = self.cluster_id
            tags['cse.version'] = pkg_resources.require(
                'container-service-extension')[0].version
            tags['cse.template'] = template['name']
            vapp = VApp(self.client_tenant, href=vapp_resource.get('href'))
            for k, v in tags.items():
                t = vapp.set_metadata('GENERAL', 'READWRITE', k, v)
                self.client_tenant.get_task_monitor().\
                    wait_for_status(
                        task=t,
                        timeout=600,
                        poll_frequency=5,
                        fail_on_status=None,
                        expected_target_statuses=[TaskStatus.SUCCESS],
                        callback=None)
            self.update_task(TaskStatus.RUNNING,
                             self.op,
                             message='Creating master node for %s(%s)' %
                             (self.cluster_name, self.cluster_id))
            vapp.reload()
            add_nodes(1,
                      template,
                      TYPE_MASTER,
                      self.config,
                      self.client_tenant,
                      org,
                      vdc,
                      vapp,
                      self.body,
                      wait=True)

            self.update_task(TaskStatus.RUNNING,
                             self.op,
                             message='Initializing cluster %s(%s)' %
                             (self.cluster_name, self.cluster_id))

            vapp.reload()
            init_cluster(self.config, vapp, template)

            master_ip = get_master_ip(self.config, vapp, template)
            t = vapp.set_metadata('GENERAL', 'READWRITE', 'cse.master.ip',
                                  master_ip)
            self.client_tenant.get_task_monitor().\
                wait_for_status(
                    task=t,
                    timeout=600,
                    poll_frequency=5,
                    fail_on_status=None,
                    expected_target_statuses=[TaskStatus.SUCCESS],
                    callback=None)

            if self.body['node_count'] > 0:

                self.update_task(TaskStatus.RUNNING,
                                 self.op,
                                 message='Creating %s node(s) for %s(%s)' %
                                 (self.body['node_count'], self.cluster_name,
                                  self.cluster_id))
                add_nodes(self.body['node_count'],
                          template,
                          TYPE_NODE,
                          self.config,
                          self.client_tenant,
                          org,
                          vdc,
                          vapp,
                          self.body,
                          wait=True)
                self.update_task(TaskStatus.RUNNING,
                                 self.op,
                                 message='Adding %s node(s) to %s(%s)' %
                                 (self.body['node_count'], self.cluster_name,
                                  self.cluster_id))
                vapp.reload()
                join_cluster(self.config, vapp, template)

            self.update_task(TaskStatus.SUCCESS,
                             self.op,
                             message='Created cluster %s(%s)' %
                             (self.cluster_name, self.cluster_id))

        except Exception as e:
            LOGGER.error(traceback.format_exc())
            self.update_task(TaskStatus.ERROR, self.op, error_message=str(e))

    def delete_cluster(self, headers, body):
        result = {}
        result['body'] = {}
        LOGGER.debug('about to delete cluster with name: %s' % body['name'])
        result['status_code'] = INTERNAL_SERVER_ERROR
        try:
            self.cluster_name = body['name']
            self.tenant_info = self._connect_tenant(headers)
            self.headers = headers
            self.body = body
            self.op = OP_DELETE_CLUSTER
            self._connect_sysadmin()
            clusters = load_from_metadata(self.client_tenant,
                                          name=self.cluster_name)
            if len(clusters) != 1:
                raise Exception('Cluster %s not found.' % self.cluster_name)
            self.cluster = clusters[0]
            self.cluster_id = self.cluster['cluster_id']

            self.update_task(TaskStatus.RUNNING,
                             self.op,
                             message='Deleting cluster %s(%s)' %
                             (self.cluster_name, self.cluster_id))
            self.daemon = True
            self.start()
            response_body = {}
            response_body['cluster_name'] = self.cluster_name
            response_body['task_href'] = self.t.get('href')
            result['body'] = response_body
            result['status_code'] = ACCEPTED
        except Exception as e:
            if hasattr(e, 'message'):
                result['body'] = {'message': e.message}
            else:
                result['body'] = {'message': str(e)}
            LOGGER.error(traceback.format_exc())
        return result

    def delete_cluster_thread(self):
        LOGGER.debug('about to delete cluster with name: %s',
                     self.cluster_name)
        try:
            vdc = VDC(self.client_tenant, href=self.cluster['vdc_href'])
            delete_task = vdc.delete_vapp(self.cluster['name'], force=True)
            self.client_tenant.get_task_monitor().\
                wait_for_status(
                    task=delete_task,
                    timeout=600,
                    poll_frequency=5,
                    fail_on_status=None,
                    expected_target_statuses=[TaskStatus.SUCCESS],
                    callback=None)
            self.update_task(TaskStatus.SUCCESS,
                             self.op,
                             message='Deleted cluster %s(%s)' %
                             (self.cluster_name, self.cluster_id))
        except Exception as e:
            LOGGER.error(traceback.format_exc())
            self.update_task(self.cluster_name,
                             self.cluster_id,
                             TaskStatus.ERROR,
                             self.op,
                             error_message=str(e))

    def get_cluster_config(self, cluster_name, headers):
        result = {}
        try:
            self._connect_tenant(headers)
            clusters = load_from_metadata(self.client_tenant,
                                          name=cluster_name)
            if len(clusters) != 1:
                raise Exception('Cluster \'%s\' not found' % cluster_name)
            vapp = VApp(self.client_tenant, href=clusters[0]['vapp_href'])
            template = self.get_template(name=clusters[0]['template'])
            result['body'] = get_cluster_config(self.config, vapp,
                                                template['admin_password'])
            result['status_code'] = OK
        except Exception as e:
            result['body'] = str(e)
            result['status_code'] = INTERNAL_SERVER_ERROR
        return result
コード例 #14
0
class DefaultBroker(threading.Thread):
    def __init__(self, config):
        threading.Thread.__init__(self)
        self.config = config
        self.host = config['vcd']['host']
        self.username = config['vcd']['username']
        self.password = config['vcd']['password']
        self.version = config['vcd']['api_version']
        self.verify = config['vcd']['verify']
        self.log = config['vcd']['log']

    def _connect_sysadmin(self):
        if not self.verify:
            LOGGER.warning('InsecureRequestWarning: '
                           'Unverified HTTPS request is being made. '
                           'Adding certificate verification is strongly '
                           'advised.')
            requests.packages.urllib3.disable_warnings()
        self.client_sysadmin = Client(uri=self.host,
                                      api_version=self.version,
                                      verify_ssl_certs=self.verify,
                                      log_headers=True,
                                      log_bodies=True)
        self.client_sysadmin.set_credentials(
            BasicLoginCredentials(self.username, 'System', self.password))

    def _connect_tenant(self, headers):
        token = headers.get('x-vcloud-authorization')
        accept_header = headers.get('Accept')
        version = accept_header.split('version=')[1]
        self.client_tenant = Client(uri=self.host,
                                    api_version=version,
                                    verify_ssl_certs=self.verify,
                                    log_headers=True,
                                    log_bodies=True)
        session = self.client_tenant.rehydrate_from_token(token)
        return {
            'user_name':
            session.get('user'),
            'user_id':
            session.get('userId'),
            'org_name':
            session.get('org'),
            'org_href':
            self.client_tenant._get_wk_endpoint(
                _WellKnownEndpoint.LOGGED_IN_ORG)
        }

    def _to_message(self, e):
        if hasattr(e, 'message'):
            return {'message': e.message}
        else:
            return {'message': str(e)}

    def update_task(self, status, message=None, error_message=None):
        if not hasattr(self, 'task'):
            self.task = Task(self.client_sysadmin)
        if message is None:
            message = OP_MESSAGE[self.op]
        if hasattr(self, 'task_resource'):
            task_href = self.task_resource.get('href')
        else:
            task_href = None
        self.task_resource = self.task.update(
            status.value,
            'vcloud.cse',
            message,
            self.op,
            '',
            None,
            'urn:cse:cluster:%s' % self.cluster_id,
            self.cluster_name,
            'application/vcloud.cse.cluster+xml',
            self.tenant_info['user_id'],
            self.tenant_info['user_name'],
            org_href=self.tenant_info['org_href'],
            task_href=task_href,
            error_message=error_message)

    def is_valid_name(self, name):
        """Validate that the cluster name against the pattern."""
        if len(name) > MAX_HOST_NAME_LENGTH:
            return False
        if name[-1] == '.':
            name = name[:-1]
        allowed = re.compile("(?!-)[A-Z\d-]{1,63}(?<!-)$", re.IGNORECASE)
        return all(allowed.match(x) for x in name.split("."))

    def get_template(self, name=None):
        if name is None:
            if 'template' in self.body and self.body['template'] is not None:
                name = self.body['template']
            else:
                name = self.config['broker']['default_template']
        for template in self.config['broker']['templates']:
            if template['name'] == name:
                return template
        raise Exception('Template %s not found' % name)

    def run(self):
        LOGGER.debug('thread started op=%s' % self.op)
        if self.op == OP_CREATE_CLUSTER:
            self.create_cluster_thread()
        elif self.op == OP_DELETE_CLUSTER:
            self.delete_cluster_thread()
        elif self.op == OP_CREATE_NODES:
            self.create_nodes_thread()
        elif self.op == OP_DELETE_NODES:
            self.delete_nodes_thread()

    def list_clusters(self, headers, body):
        result = {}
        try:
            result['body'] = []
            result['status_code'] = OK
            self._connect_tenant(headers)
            clusters = load_from_metadata(self.client_tenant)
            result['body'] = clusters
        except Exception:
            LOGGER.error(traceback.format_exc())
            result['body'] = []
            result['status_code'] = INTERNAL_SERVER_ERROR
            result['message'] = traceback.format_exc()
        return result

    def get_cluster_info(self, name, headers, body):
        result = {}
        try:
            result['body'] = []
            result['status_code'] = OK
            self._connect_tenant(headers)
            clusters = load_from_metadata(self.client_tenant, name=name)
            if len(clusters) == 0:
                raise Exception('Cluster \'%s\' not found.' % name)
            vapp = VApp(self.client_tenant, href=clusters[0]['vapp_href'])
            vms = vapp.get_all_vms()
            for vm in vms:
                node_info = {
                    'name': vm.get('name'),
                    'numberOfCpus': '',
                    'memoryMB': '',
                    'status': VCLOUD_STATUS_MAP.get(int(vm.get('status'))),
                    'ipAddress': ''
                }
                if hasattr(vm, 'VmSpecSection'):
                    node_info['numberOfCpus'] = vm.VmSpecSection.NumCpus.text
                    node_info[
                        'memoryMB'] = \
                        vm.VmSpecSection.MemoryResourceMb.Configured.text
                try:
                    node_info['ipAddress'] = vapp.get_primary_ip(
                        vm.get('name'))
                except Exception:
                    LOGGER.debug('cannot get ip address for node %s' %
                                 vm.get('name'))
                if vm.get('name').startswith(TYPE_MASTER):
                    node_info['node_type'] = 'master'
                    clusters[0].get('master_nodes').append(node_info)
                elif vm.get('name').startswith(TYPE_NODE):
                    node_info['node_type'] = 'node'
                    clusters[0].get('nodes').append(node_info)
            result['body'] = clusters[0]
        except Exception as e:
            LOGGER.error(traceback.format_exc())
            result['body'] = []
            result['status_code'] = INTERNAL_SERVER_ERROR
            result['message'] = str(e)
        return result

    def create_cluster(self, headers, body):
        result = {}
        result['body'] = {}
        cluster_name = body['name']
        vdc_name = body['vdc']
        node_count = body['node_count']
        LOGGER.debug('about to create cluster %s on %s with %s nodes, sp=%s',
                     cluster_name, vdc_name, node_count,
                     body['storage_profile'])
        result['body'] = {
            'message': 'can\'t create cluster \'%s\'' % cluster_name
        }
        result['status_code'] = INTERNAL_SERVER_ERROR
        try:
            if not self.is_valid_name(cluster_name):
                raise Exception('Invalid cluster name')
            self.tenant_info = self._connect_tenant(headers)
            self.headers = headers
            self.body = body
            self.cluster_name = cluster_name
            self.cluster_id = str(uuid.uuid4())
            self.op = OP_CREATE_CLUSTER
            self._connect_sysadmin()
            self.update_task(TaskStatus.RUNNING,
                             message='Creating cluster %s(%s)' %
                             (cluster_name, self.cluster_id))
            self.daemon = True
            self.start()
            response_body = {}
            response_body['name'] = self.cluster_name
            response_body['cluster_id'] = self.cluster_id
            response_body['task_href'] = self.task_resource.get('href')
            result['body'] = response_body
            result['status_code'] = ACCEPTED
        except Exception as e:
            result['body'] = self._to_message(e)
            LOGGER.error(traceback.format_exc())
        return result

    def create_cluster_thread(self):
        network_name = self.body['network']
        try:
            clusters = load_from_metadata(self.client_tenant,
                                          name=self.cluster_name)
            if len(clusters) != 0:
                raise Exception('Cluster already exists.')
            org_resource = self.client_tenant.get_org()
            org = Org(self.client_tenant, resource=org_resource)
            vdc_resource = org.get_vdc(self.body['vdc'])
            vdc = VDC(self.client_tenant, resource=vdc_resource)
            template = self.get_template()
            self.update_task(TaskStatus.RUNNING,
                             message='Creating cluster vApp %s(%s)' %
                             (self.cluster_name, self.cluster_id))
            vapp_resource = vdc.create_vapp(self.cluster_name,
                                            description='cluster %s' %
                                            self.cluster_name,
                                            network=network_name,
                                            fence_mode='bridged')
            self.client_tenant.get_task_monitor().wait_for_status(
                vapp_resource.Tasks.Task[0])
            tags = {}
            tags['cse.cluster.id'] = self.cluster_id
            tags['cse.version'] = pkg_resources.require(
                'container-service-extension')[0].version
            tags['cse.template'] = template['name']
            vapp = VApp(self.client_tenant, href=vapp_resource.get('href'))
            for k, v in tags.items():
                task = vapp.set_metadata('GENERAL', 'READWRITE', k, v)
                self.client_tenant.get_task_monitor().wait_for_status(task)
            self.update_task(TaskStatus.RUNNING,
                             message='Creating master node for %s(%s)' %
                             (self.cluster_name, self.cluster_id))
            vapp.reload()
            add_nodes(1, template, TYPE_MASTER, self.config,
                      self.client_tenant, org, vdc, vapp, self.body)
            self.update_task(TaskStatus.RUNNING,
                             message='Initializing cluster %s(%s)' %
                             (self.cluster_name, self.cluster_id))
            vapp.reload()
            init_cluster(self.config, vapp, template)
            master_ip = get_master_ip(self.config, vapp, template)
            task = vapp.set_metadata('GENERAL', 'READWRITE', 'cse.master.ip',
                                     master_ip)
            self.client_tenant.get_task_monitor().wait_for_status(task)
            if self.body['node_count'] > 0:
                self.update_task(TaskStatus.RUNNING,
                                 message='Creating %s node(s) for %s(%s)' %
                                 (self.body['node_count'], self.cluster_name,
                                  self.cluster_id))
                add_nodes(self.body['node_count'], template, TYPE_NODE,
                          self.config, self.client_tenant, org, vdc, vapp,
                          self.body)
                self.update_task(TaskStatus.RUNNING,
                                 message='Adding %s node(s) to %s(%s)' %
                                 (self.body['node_count'], self.cluster_name,
                                  self.cluster_id))
                vapp.reload()
                join_cluster(self.config, vapp, template)
            self.update_task(TaskStatus.SUCCESS,
                             message='Created cluster %s(%s)' %
                             (self.cluster_name, self.cluster_id))
        except Exception as e:
            LOGGER.error(traceback.format_exc())
            self.update_task(TaskStatus.ERROR, error_message=str(e))

    def delete_cluster(self, headers, body):
        result = {}
        result['body'] = {}
        LOGGER.debug('about to delete cluster with name: %s' % body['name'])
        result['status_code'] = INTERNAL_SERVER_ERROR
        try:
            self.cluster_name = body['name']
            self.tenant_info = self._connect_tenant(headers)
            self.headers = headers
            self.body = body
            self.op = OP_DELETE_CLUSTER
            self._connect_sysadmin()
            clusters = load_from_metadata(self.client_tenant,
                                          name=self.cluster_name)
            if len(clusters) != 1:
                raise Exception('Cluster %s not found.' % self.cluster_name)
            self.cluster = clusters[0]
            self.cluster_id = self.cluster['cluster_id']
            self.update_task(TaskStatus.RUNNING,
                             message='Deleting cluster %s(%s)' %
                             (self.cluster_name, self.cluster_id))
            self.daemon = True
            self.start()
            response_body = {}
            response_body['cluster_name'] = self.cluster_name
            response_body['task_href'] = self.task_resource.get('href')
            result['body'] = response_body
            result['status_code'] = ACCEPTED
        except Exception as e:
            result['body'] = self._to_message(e)
            LOGGER.error(traceback.format_exc())
        return result

    def delete_cluster_thread(self):
        LOGGER.debug('about to delete cluster with name: %s',
                     self.cluster_name)
        try:
            vdc = VDC(self.client_tenant, href=self.cluster['vdc_href'])
            task = vdc.delete_vapp(self.cluster['name'], force=True)
            self.client_tenant.get_task_monitor().wait_for_status(task)
            self.update_task(TaskStatus.SUCCESS,
                             message='Deleted cluster %s(%s)' %
                             (self.cluster_name, self.cluster_id))
        except Exception as e:
            LOGGER.error(traceback.format_exc())
            self.update_task(TaskStatus.ERROR, error_message=str(e))

    def get_cluster_config(self, cluster_name, headers):
        result = {}
        try:
            self._connect_tenant(headers)
            clusters = load_from_metadata(self.client_tenant,
                                          name=cluster_name)
            if len(clusters) != 1:
                raise Exception('Cluster \'%s\' not found' % cluster_name)
            vapp = VApp(self.client_tenant, href=clusters[0]['vapp_href'])
            template = self.get_template(name=clusters[0]['template'])
            result['body'] = get_cluster_config(self.config, vapp,
                                                template['admin_password'])
            result['status_code'] = OK
        except Exception as e:
            result['body'] = self._to_message(e)
            result['status_code'] = INTERNAL_SERVER_ERROR
        return result

    def create_nodes(self, headers, body):
        result = {'body': {}}
        self.cluster_name = body['name']
        LOGGER.debug('about to add %s nodes to cluster %s on VDC %s, sp=%s',
                     body['node_count'], self.cluster_name, body['vdc'],
                     body['storage_profile'])
        result['status_code'] = INTERNAL_SERVER_ERROR
        try:
            if body['node_count'] < 1:
                raise Exception('Invalid node count: %s.' % body['node_count'])
            self.tenant_info = self._connect_tenant(headers)
            clusters = load_from_metadata(self.client_tenant,
                                          name=self.cluster_name)
            if len(clusters) != 1:
                raise Exception('Cluster \'%s\' not found.' %
                                self.cluster_name)
            self.cluster = clusters[0]
            self.headers = headers
            self.body = body
            self.op = OP_CREATE_NODES
            self._connect_sysadmin()
            self.cluster_id = self.cluster['cluster_id']
            self.update_task(
                TaskStatus.RUNNING,
                message='Adding %s node(s) to cluster %s(%s)' %
                (body['node_count'], self.cluster_name, self.cluster_id))
            self.daemon = True
            self.start()
            response_body = {}
            response_body['cluster_name'] = self.cluster_name
            response_body['task_href'] = self.task_resource.get('href')
            result['body'] = response_body
            result['status_code'] = ACCEPTED
        except Exception as e:
            result['body'] = self._to_message(e)
            LOGGER.error(traceback.format_exc())
        return result

    def create_nodes_thread(self):
        LOGGER.debug('about to add nodes to cluster with name: %s',
                     self.cluster_name)
        try:
            org_resource = self.client_tenant.get_org()
            org = Org(self.client_tenant, resource=org_resource)
            vdc = VDC(self.client_tenant, href=self.cluster['vdc_href'])
            vapp = VApp(self.client_tenant, href=self.cluster['vapp_href'])
            template = self.get_template()
            self.update_task(
                TaskStatus.RUNNING,
                message='Creating %s node(s) for %s(%s)' %
                (self.body['node_count'], self.cluster_name, self.cluster_id))
            new_nodes = add_nodes(self.body['node_count'], template, TYPE_NODE,
                                  self.config, self.client_tenant, org, vdc,
                                  vapp, self.body)
            self.update_task(
                TaskStatus.RUNNING,
                message='Adding %s node(s) to %s(%s)' %
                (self.body['node_count'], self.cluster_name, self.cluster_id))
            target_nodes = []
            for spec in new_nodes['specs']:
                target_nodes.append(spec['target_vm_name'])
            vapp.reload()
            join_cluster(self.config, vapp, template, target_nodes)
            self.update_task(
                TaskStatus.SUCCESS,
                message='Added %s node(s) to cluster %s(%s)' %
                (self.body['node_count'], self.cluster_name, self.cluster_id))
        except Exception as e:
            LOGGER.error(traceback.format_exc())
            self.update_task(TaskStatus.ERROR, error_message=str(e))

    def delete_nodes(self, headers, body):
        result = {'body': {}}
        self.cluster_name = body['name']
        LOGGER.debug('about to delete nodes from cluster with name: %s' %
                     body['name'])
        result['status_code'] = INTERNAL_SERVER_ERROR
        try:
            if len(body['nodes']) < 1:
                raise Exception('Invalid list of nodes: %s.' % body['nodes'])
            for node in body['nodes']:
                if node.startswith(TYPE_MASTER):
                    raise Exception('Can\'t delete a master node: \'%s\'.' %
                                    node)
            self.tenant_info = self._connect_tenant(headers)
            clusters = load_from_metadata(self.client_tenant,
                                          name=self.cluster_name)
            if len(clusters) != 1:
                raise Exception('Cluster \'%s\' not found.' %
                                self.cluster_name)
            self.cluster = clusters[0]
            self.headers = headers
            self.body = body
            self.op = OP_DELETE_NODES
            self._connect_sysadmin()
            self.cluster_id = self.cluster['cluster_id']
            self.update_task(
                TaskStatus.RUNNING,
                message='Deleting %s node(s) from cluster %s(%s)' %
                (len(body['nodes']), self.cluster_name, self.cluster_id))
            self.daemon = True
            self.start()
            response_body = {}
            response_body['cluster_name'] = self.cluster_name
            response_body['task_href'] = self.task_resource.get('href')
            result['body'] = response_body
            result['status_code'] = ACCEPTED
        except Exception as e:
            result['body'] = self._to_message(e)
            LOGGER.error(traceback.format_exc())
        return result

    def delete_nodes_thread(self):
        LOGGER.debug('about to delete nodes from cluster with name: %s',
                     self.cluster_name)
        try:
            vapp = VApp(self.client_tenant, href=self.cluster['vapp_href'])
            template = self.get_template()
            self.update_task(
                TaskStatus.RUNNING,
                message='Deleting %s node(s) from %s(%s)' %
                (len(self.body['nodes']), self.cluster_name, self.cluster_id))
            delete_nodes_from_cluster(self.config, vapp, template,
                                      self.body['nodes'], self.body['force'])
            self.update_task(
                TaskStatus.RUNNING,
                message='Undeploying %s node(s) for %s(%s)' %
                (len(self.body['nodes']), self.cluster_name, self.cluster_id))
            for vm_name in self.body['nodes']:
                vm = VM(self.client_tenant, resource=vapp.get_vm(vm_name))
                try:
                    task = vm.undeploy()
                    self.client_tenant.get_task_monitor().wait_for_status(task)
                except Exception as e:
                    LOGGER.warning('couldn\'t undeploy VM %s' % vm_name)
            self.update_task(
                TaskStatus.RUNNING,
                message='Deleting %s VM(s) for %s(%s)' %
                (len(self.body['nodes']), self.cluster_name, self.cluster_id))
            task = vapp.delete_vms(self.body['nodes'])
            self.client_tenant.get_task_monitor().wait_for_status(task)
            self.update_task(
                TaskStatus.SUCCESS,
                message='Deleted %s node(s) to cluster %s(%s)' %
                (len(self.body['nodes']), self.cluster_name, self.cluster_id))
        except Exception as e:
            LOGGER.error(traceback.format_exc())
            self.update_task(TaskStatus.ERROR, error_message=str(e))
コード例 #15
0
def _update_ovdc_using_placement_policy_async(operation_context: ctx.OperationContext,  # noqa: E501
                                              task: vcd_task.Task,
                                              task_href,
                                              user_href,
                                              policy_list,
                                              ovdc_id,
                                              vdc,
                                              remove_cp_from_vms_on_disable=False):  # noqa: E501
    """Enable ovdc using placement policies.

    :param ctx.OperationContext operation_context: operation context object
    :param vcd_task.Task task: Task resource to track progress
    :param str task_href: href of the task
    :param str user_href:
    :param List[str] policy_list: The new list of policies associated with
        the ovdc
    :param str ovdc_id:
    :param pyvcloud.vcd.vdc.VDC vdc: VDC object
    :param bool remove_cp_from_vms_on_disable: Set to true if placement
        policies need to be removed from the vms before removing from the VDC.
    """
    operation_name = "Update OVDC with placement policies"
    k8s_runtimes_added = ''
    k8s_runtimes_deleted = ''
    try:
        config = utils.get_server_runtime_config()
        log_wire = utils.str_to_bool(config.get('service', {}).get('log_wire'))
        cpm = compute_policy_manager.ComputePolicyManager(
            operation_context.sysadmin_client, log_wire=log_wire)
        existing_policies = []
        for cse_policy in \
                compute_policy_manager.list_cse_placement_policies_on_vdc(cpm, ovdc_id):  # noqa: E501
            existing_policies.append(cse_policy['display_name'])

        logger.SERVER_LOGGER.debug(policy_list)
        logger.SERVER_LOGGER.debug(existing_policies)
        policies_to_add = set(policy_list) - set(existing_policies)
        policies_to_delete = set(existing_policies) - set(policy_list)

        # Telemetry for 'vcd cse ovdc enable' command
        # TODO: Update telemetry request to handle 'k8s_runtime' array
        k8s_runtimes_added = ','.join(policies_to_add)
        if k8s_runtimes_added:
            cse_params = {
                RequestKey.K8S_PROVIDER: k8s_runtimes_added,
                RequestKey.OVDC_ID: ovdc_id,
            }
            telemetry_handler.record_user_action_details(cse_operation=CseOperation.OVDC_ENABLE, # noqa: E501
                                                         cse_params=cse_params)

        # Telemetry for 'vcd cse ovdc enable' command
        # TODO: Update telemetry request to handle 'k8s_runtime' array
        k8s_runtimes_deleted = '.'.join(policies_to_delete)
        if k8s_runtimes_deleted:
            cse_params = {
                RequestKey.K8S_PROVIDER: k8s_runtimes_deleted,
                RequestKey.OVDC_ID: ovdc_id,
                RequestKey.REMOVE_COMPUTE_POLICY_FROM_VMS: remove_cp_from_vms_on_disable # noqa: E501
            }
            telemetry_handler.record_user_action_details(cse_operation=CseOperation.OVDC_DISABLE, # noqa: E501
                                                         cse_params=cse_params)

        for cp_name in policies_to_add:
            msg = f"Adding k8s provider {cp_name} to OVDC {vdc.name}"
            logger.SERVER_LOGGER.debug(msg)
            task.update(status=vcd_client.TaskStatus.RUNNING.value,
                        namespace='vcloud.cse',
                        operation=msg,
                        operation_name=operation_name,
                        details='',
                        progress=None,
                        owner_href=vdc.href,
                        owner_name=vdc.name,
                        owner_type=vcd_client.EntityType.VDC.value,
                        user_href=user_href,
                        user_name=operation_context.user.name,
                        task_href=task_href,
                        org_href=operation_context.user.org_href)
            policy = compute_policy_manager.get_cse_vdc_compute_policy(
                cpm,
                cp_name,
                is_placement_policy=True)
            cpm.add_compute_policy_to_vdc(vdc_id=ovdc_id,
                                          compute_policy_href=policy['href'])

        for cp_name in policies_to_delete:
            msg = f"Removing k8s provider {RUNTIME_INTERNAL_NAME_TO_DISPLAY_NAME_MAP[cp_name]} from OVDC {ovdc_id}"  # noqa: E501
            logger.SERVER_LOGGER.debug(msg)
            task_resource = \
                task.update(status=vcd_client.TaskStatus.RUNNING.value,
                            namespace='vcloud.cse',
                            operation=msg,
                            operation_name=operation_name,
                            details='',
                            progress=None,
                            owner_href=vdc.href,
                            owner_name=vdc.name,
                            owner_type=vcd_client.EntityType.VDC.value,
                            user_href=user_href,
                            user_name=operation_context.user.name,
                            task_href=task_href,
                            org_href=operation_context.user.org_href)
            policy = compute_policy_manager.get_cse_vdc_compute_policy(cpm,
                                                                       cp_name,
                                                                       is_placement_policy=True)  # noqa: E501
            cpm.remove_compute_policy_from_vdc_sync(vdc=vdc,
                                                    compute_policy_href=policy['href'],  # noqa: E501
                                                    force=remove_cp_from_vms_on_disable, # noqa: E501
                                                    is_placement_policy=True,
                                                    task_resource=task_resource) # noqa: E501
        msg = f"Successfully updated OVDC: {vdc.name}"
        logger.SERVER_LOGGER.debug(msg)
        task.update(status=vcd_client.TaskStatus.SUCCESS.value,
                    namespace='vcloud.cse',
                    operation="Operation success",
                    operation_name=operation_name,
                    details=msg,
                    progress=None,
                    owner_href=vdc.href,
                    owner_name=vdc.name,
                    owner_type=vcd_client.EntityType.VDC.value,
                    user_href=user_href,
                    user_name=operation_context.user.name,
                    task_href=task_href,
                    org_href=operation_context.user.org_href)
        # Record telemetry
        if k8s_runtimes_added:
            telemetry_handler.record_user_action(CseOperation.OVDC_ENABLE,
                                                 status=OperationStatus.SUCCESS) # noqa: E501
        if k8s_runtimes_deleted:
            telemetry_handler.record_user_action(CseOperation.OVDC_DISABLE,
                                                 status=OperationStatus.SUCCESS) # noqa: E501
    except Exception as err:
        # Record telemetry
        if k8s_runtimes_added:
            telemetry_handler.record_user_action(CseOperation.OVDC_ENABLE,
                                                 status=OperationStatus.FAILED)
        if k8s_runtimes_deleted:
            telemetry_handler.record_user_action(CseOperation.OVDC_DISABLE,
                                                 status=OperationStatus.FAILED)
        logger.SERVER_LOGGER.error(err)
        task.update(status=vcd_client.TaskStatus.ERROR.value,
                    namespace='vcloud.cse',
                    operation='Failed to update OVDC',
                    operation_name=operation_name,
                    details=f'Failed with error: {err}',
                    progress=None,
                    owner_href=vdc.href,
                    owner_name=vdc.name,
                    owner_type=vcd_client.EntityType.VDC.value,
                    user_href=user_href,
                    user_name=operation_context.user.name,
                    task_href=task_href,
                    org_href=operation_context.user.org_href,
                    error_message=f"{err}")
    finally:
        if operation_context.sysadmin_client:
            operation_context.end()
コード例 #16
0
def _reload_templates_async(op_ctx, task_href):
    user_context = None
    task = None
    user_href = None
    try:
        user_context = op_ctx.get_user_context(api_version=None)
        user_client = user_context.client
        org = vcd_utils.get_org(user_client, user_context.org_name)
        user_href = org.get_user(user_context.name).get('href')
        task = Task(user_client)

        server_config = server_utils.get_server_runtime_config()
        if not server_utils.is_no_vc_communication_mode():
            native_templates = \
                template_reader.read_native_template_definition_from_catalog(
                    config=server_config
                )
            server_config.set_value_at('broker.templates', native_templates)
            task.update(status=TaskStatus.RUNNING.value,
                        namespace='vcloud.cse',
                        operation="Finished reloading native templates.",
                        operation_name='template reload',
                        details='',
                        progress=None,
                        owner_href=user_context.org_href,
                        owner_name=user_context.org_name,
                        owner_type='application/vnd.vmware.vcloud.org+xml',
                        user_href=user_href,
                        user_name=user_context.name,
                        org_href=user_context.org_href,
                        task_href=task_href)
        else:
            msg = "Skipping loading k8s template definition from catalog " \
                  "since `No communication with VCenter` mode is on."
            logger.SERVER_LOGGER.info(msg)
            server_config.set_value_at('broker.templates', [])
            task.update(status=TaskStatus.RUNNING.value,
                        namespace='vcloud.cse',
                        operation=msg,
                        operation_name='template reload',
                        details='',
                        progress=None,
                        owner_href=user_context.org_href,
                        owner_name=user_context.org_name,
                        owner_type='application/vnd.vmware.vcloud.org+xml',
                        user_href=user_href,
                        user_name=user_context.name,
                        org_href=user_context.org_href,
                        task_href=task_href)

        task.update(status=TaskStatus.RUNNING.value,
                    namespace='vcloud.cse',
                    operation="Reloading TKG templates.",
                    operation_name='template reload',
                    details='',
                    progress=None,
                    owner_href=user_context.org_href,
                    owner_name=user_context.org_name,
                    owner_type='application/vnd.vmware.vcloud.org+xml',
                    user_href=user_href,
                    user_name=user_context.name,
                    org_href=user_context.org_href,
                    task_href=task_href)
        tkgm_templates = \
            template_reader.read_tkgm_template_definition_from_catalog(
                config=server_config
            )
        server_config.set_value_at('broker.tkgm_templates', tkgm_templates)
        task.update(status=TaskStatus.SUCCESS.value,
                    namespace='vcloud.cse',
                    operation="Finished reloading all templates.",
                    operation_name='template reload',
                    details='',
                    progress=None,
                    owner_href=user_context.org_href,
                    owner_name=user_context.org_name,
                    owner_type='application/vnd.vmware.vcloud.org+xml',
                    user_href=user_href,
                    user_name=user_context.name,
                    org_href=user_context.org_href,
                    task_href=task_href)
    except Exception:
        msg = "Error reloading templates."
        logger.SERVER_LOGGER.error(msg, exc_info=True)
        if task and user_context and user_href:
            task.update(status=TaskStatus.ERROR.value,
                        namespace='vcloud.cse',
                        operation=msg,
                        operation_name='template reload',
                        details='',
                        progress=None,
                        owner_href=user_context.org_href,
                        owner_name=user_context.org_name,
                        owner_type='application/vnd.vmware.vcloud.org+xml',
                        user_href=user_href,
                        user_name=user_context.name,
                        org_href=user_context.org_href,
                        task_href=task_href)
    finally:
        op_ctx.end()
コード例 #17
0
    def remove_compute_policy_from_vdc_sync(self,
                                            vdc,
                                            compute_policy_href,
                                            force=False,
                                            is_placement_policy=False,
                                            task_resource=None):
        """Remove compute policy from vdc.

        This method makes use of an umbrella task which can be used for
        tracking progress. If the umbrella task is not specified, it is
        created.

        :param pyvcloud.vcd.vdc.VDC vdc: VDC object
        :param str compute_policy_href: href of the compute policy to remove
        :param bool force: Force remove compute policy from vms in the VDC
            as well
        :param lxml.objectify.Element task_resource: Task resource for
            the umbrella task
        """
        user_name = self._session.get('user')

        task = Task(self._sysadmin_client)
        task_href = None
        is_umbrella_task = task_resource is not None
        # Create a task if not umbrella task
        if not is_umbrella_task:
            # TODO the following org will be associated with 'System' org.
            # task created should be associated with the corresponding org of
            # the vdc object.
            org = vcd_utils.get_org(self._sysadmin_client)
            org.reload()
            user_href = org.get_user(user_name).get('href')
            org_href = org.href
            task_resource = task.update(
                status=vcd_client.TaskStatus.RUNNING.value,
                namespace='vcloud.cse',
                operation=
                f"Removing compute policy (href: {compute_policy_href})"  # noqa: E501
                f" from org VDC (vdc id: {vdc.name})",
                operation_name='Remove org VDC compute policy',
                details='',
                progress=None,
                owner_href=vdc.href,
                owner_name=vdc.name,
                owner_type=vcd_client.EntityType.VDC.value,
                user_href=user_href,
                user_name=user_name,
                org_href=org.href)
        else:
            user_href = task_resource.User.get('href')
            org_href = task_resource.Organization.get('href')

        task_href = task_resource.get('href')

        try:
            # remove the compute policy from VMs if force is True
            if force:
                compute_policy_id = retrieve_compute_policy_id_from_href(
                    compute_policy_href)  # noqa: E501
                vdc_id = vcd_utils.extract_id(vdc.get_resource().get('id'))
                vapps = vcd_utils.get_all_vapps_in_ovdc(
                    client=self._sysadmin_client, ovdc_id=vdc_id)
                target_vms = []
                system_default_href = None
                operation_msg = None
                for cp_dict in self.list_compute_policies_on_vdc(vdc_id):
                    if cp_dict['name'] == _SYSTEM_DEFAULT_COMPUTE_POLICY:
                        system_default_href = cp_dict['href']
                        break
                if is_placement_policy:
                    for vapp in vapps:
                        target_vms += \
                            [vm for vm in vapp.get_all_vms()
                                if self._get_vm_placement_policy_id(vm) == compute_policy_id] # noqa: E501
                    vm_names = [vm.get('name') for vm in target_vms]
                    operation_msg = f"Removing placement policy from " \
                                    f"{len(vm_names)} VMs. " \
                                    f"Affected VMs: {vm_names}"
                else:
                    for vapp in vapps:
                        target_vms += \
                            [vm for vm in vapp.get_all_vms()
                                if self._get_vm_sizing_policy_id(vm) == compute_policy_id] # noqa: E501
                    vm_names = [vm.get('name') for vm in target_vms]
                    operation_msg = "Setting sizing policy to " \
                                    f"'{_SYSTEM_DEFAULT_COMPUTE_POLICY}' on " \
                                    f"{len(vm_names)} VMs. " \
                                    f"Affected VMs: {vm_names}"

                task.update(status=vcd_client.TaskStatus.RUNNING.value,
                            namespace='vcloud.cse',
                            operation=operation_msg,
                            operation_name='Remove org VDC compute policy',
                            details='',
                            progress=None,
                            owner_href=vdc.href,
                            owner_name=vdc.name,
                            owner_type=vcd_client.EntityType.VDC.value,
                            user_href=user_href,
                            user_name=user_name,
                            task_href=task_href,
                            org_href=org_href)

                task_monitor = self._sysadmin_client.get_task_monitor()
                for vm_resource in target_vms:
                    vm = VM(self._sysadmin_client,
                            href=vm_resource.get('href'))
                    _task = None
                    operation_msg = None
                    if is_placement_policy:
                        if hasattr(vm_resource, 'ComputePolicy') and \
                                not hasattr(vm_resource.ComputePolicy, 'VmSizingPolicy'):  # noqa: E501
                            # Updating sizing policy for the VM
                            _task = vm.update_compute_policy(
                                compute_policy_href=system_default_href)
                            operation_msg = \
                                "Setting compute policy to " \
                                f"'{_SYSTEM_DEFAULT_COMPUTE_POLICY}' "\
                                f"on VM '{vm_resource.get('name')}'"
                            task.update(
                                status=vcd_client.TaskStatus.RUNNING.value,
                                namespace='vcloud.cse',
                                operation=operation_msg,
                                operation_name=
                                f'Setting sizing policy to {_SYSTEM_DEFAULT_COMPUTE_POLICY}',  # noqa: E501
                                details='',
                                progress=None,
                                owner_href=vdc.href,
                                owner_name=vdc.name,
                                owner_type=vcd_client.EntityType.VDC.value,
                                user_href=user_href,
                                user_name=user_name,
                                task_href=task_href,
                                org_href=org_href)
                            task_monitor.wait_for_success(_task)
                        _task = vm.remove_placement_policy()
                        operation_msg = "Removing placement policy on VM " \
                                        f"'{vm_resource.get('name')}'"
                        task.update(
                            status=vcd_client.TaskStatus.RUNNING.value,
                            namespace='vcloud.cse',
                            operation=operation_msg,
                            operation_name='Remove org VDC compute policy',
                            details='',
                            progress=None,
                            owner_href=vdc.href,
                            owner_name=vdc.name,
                            owner_type=vcd_client.EntityType.VDC.value,
                            user_href=user_href,
                            user_name=user_name,
                            task_href=task_href,
                            org_href=org_href)
                        task_monitor.wait_for_success(_task)
                    else:
                        _task = vm.update_compute_policy(
                            compute_policy_href=system_default_href)
                        operation_msg = "Setting sizing policy to " \
                                        f"'{_SYSTEM_DEFAULT_COMPUTE_POLICY}' "\
                                        f"on VM '{vm_resource.get('name')}'"
                        task.update(
                            status=vcd_client.TaskStatus.RUNNING.value,
                            namespace='vcloud.cse',
                            operation=operation_msg,
                            operation_name='Remove org VDC compute policy',
                            details='',
                            progress=None,
                            owner_href=vdc.href,
                            owner_name=vdc.name,
                            owner_type=vcd_client.EntityType.VDC.value,
                            user_href=user_href,
                            user_name=user_name,
                            task_href=task_href,
                            org_href=org_href)
                        task_monitor.wait_for_success(_task)

            final_status = vcd_client.TaskStatus.RUNNING.value \
                if is_umbrella_task else vcd_client.TaskStatus.SUCCESS.value
            task.update(status=final_status,
                        namespace='vcloud.cse',
                        operation=f"Removing compute policy (href:"
                        f"{compute_policy_href}) from org VDC '{vdc.name}'",
                        operation_name='Remove org VDC compute policy',
                        details='',
                        progress=None,
                        owner_href=vdc.href,
                        owner_name=vdc.name,
                        owner_type=vcd_client.EntityType.VDC.value,
                        user_href=user_href,
                        user_name=user_name,
                        task_href=task_href,
                        org_href=org_href)

            vdc.remove_compute_policy(compute_policy_href)
        except Exception as err:
            logger.SERVER_LOGGER.error(err, exc_info=True)
            # Set task to error if not an umbrella task
            if not is_umbrella_task:
                msg = 'Failed to remove compute policy: ' \
                      f'{compute_policy_href} from the OVDC: {vdc.name}'
                task.update(status=vcd_client.TaskStatus.ERROR.value,
                            namespace='vcloud.cse',
                            operation=msg,
                            operation_name='Remove org VDC compute policy',
                            details='',
                            progress=None,
                            owner_href=vdc.href,
                            owner_name=vdc.name,
                            owner_type=vcd_client.EntityType.VDC.value,
                            user_href=user_href,
                            user_name=self._session.get('user'),
                            task_href=task_href,
                            org_href=org_href,
                            error_message=f"{err}",
                            stack_trace='')
            raise err
コード例 #18
0
class DefaultBroker(AbstractBroker, threading.Thread):
    def __init__(self, headers, request_body):
        threading.Thread.__init__(self)
        self.headers = headers
        self.body = request_body

        self.tenant_client = None
        self.client_session = None
        self.tenant_info = None

        self.sys_admin_client = None

        self.task = None
        self.task_resource = None
        self.op = None
        self.cluster_name = None
        self.cluster_id = None
        self.daemon = False

    def get_tenant_client_session(self):
        if self.client_session is None:
            self._connect_tenant()
        return self.client_session

    def _connect_tenant(self):
        server_config = get_server_runtime_config()
        host = server_config['vcd']['host']
        verify = server_config['vcd']['verify']
        self.tenant_client, self.client_session = connect_vcd_user_via_token(
            vcd_uri=host, headers=self.headers, verify_ssl_certs=verify)
        self.tenant_info = {
            'user_name':
            self.client_session.get('user'),
            'user_id':
            self.client_session.get('userId'),
            'org_name':
            self.client_session.get('org'),
            'org_href':
            self.tenant_client._get_wk_endpoint(
                _WellKnownEndpoint.LOGGED_IN_ORG)
        }

    def _connect_sys_admin(self):
        self.sys_admin_client = get_vcd_sys_admin_client()

    def _disconnect_sys_admin(self):
        if self.sys_admin_client is not None:
            self.sys_admin_client.logout()
            self.sys_admin_client = None

    def _to_message(self, e):
        if hasattr(e, 'message'):
            return {'message': e.message}
        else:
            return {'message': str(e)}

    def update_task(self,
                    status,
                    message=None,
                    error_message=None,
                    stack_trace=''):
        if not self.tenant_client.is_sysadmin():
            stack_trace = ''

        if self.task is None:
            self.task = Task(self.sys_admin_client)

        if message is None:
            message = OP_MESSAGE[self.op]

        if self.task_resource is not None:
            task_href = self.task_resource.get('href')
        else:
            task_href = None

        self.task_resource = self.task.update(
            status=status.value,
            namespace='vcloud.cse',
            operation=message,
            operation_name=self.op,
            details='',
            progress=None,
            owner_href='urn:cse:cluster:%s' % self.cluster_id,
            owner_name=self.cluster_name,
            owner_type='application/vcloud.cse.cluster+xml',
            user_href=self.tenant_info['user_id'],
            user_name=self.tenant_info['user_name'],
            org_href=self.tenant_info['org_href'],
            task_href=task_href,
            error_message=error_message,
            stack_trace=stack_trace)

    def is_valid_name(self, name):
        """Validate that the cluster name against the pattern."""
        if len(name) > MAX_HOST_NAME_LENGTH:
            return False
        if name[-1] == '.':
            name = name[:-1]
        allowed = re.compile("(?!-)[A-Z\d-]{1,63}(?<!-)$", re.IGNORECASE)
        return all(allowed.match(x) for x in name.split("."))

    def get_template(self, name=None):
        server_config = get_server_runtime_config()
        if name is None:
            if 'template' in self.body and self.body['template'] is not None:
                name = self.body['template']
            else:
                name = server_config['broker']['default_template']
        for template in server_config['broker']['templates']:
            if template['name'] == name:
                return template
        raise Exception('Template %s not found' % name)

    def run(self):
        LOGGER.debug('thread started op=%s' % self.op)
        if self.op == OP_CREATE_CLUSTER:
            self.create_cluster_thread()
        elif self.op == OP_DELETE_CLUSTER:
            self.delete_cluster_thread()
        elif self.op == OP_CREATE_NODES:
            self.create_nodes_thread()
        elif self.op == OP_DELETE_NODES:
            self.delete_nodes_thread()

    @exception_handler
    def list_clusters(self):
        result = {}
        result['body'] = []
        result['status_code'] = OK
        self._connect_tenant()
        clusters = load_from_metadata(self.tenant_client)
        result['body'] = clusters
        return result

    @exception_handler
    def get_cluster_info(self, name):
        """Get the info of the cluster.

        :param cluster_name: (str): Name of the cluster

        :return: (dict): Info of the cluster.
        """
        result = {}
        result['body'] = []
        result['status_code'] = OK

        self._connect_tenant()
        clusters = load_from_metadata(self.tenant_client, name=name)
        if len(clusters) == 0:
            raise CseServerError('Cluster \'%s\' not found.' % name)
        vapp = VApp(self.tenant_client, href=clusters[0]['vapp_href'])
        vms = vapp.get_all_vms()
        for vm in vms:
            node_info = {'name': vm.get('name'), 'ipAddress': ''}
            try:
                node_info['ipAddress'] = vapp.get_primary_ip(vm.get('name'))
            except Exception:
                LOGGER.debug('cannot get ip address for node %s' %
                             vm.get('name'))
            if vm.get('name').startswith(TYPE_MASTER):
                clusters[0].get('master_nodes').append(node_info)
            elif vm.get('name').startswith(TYPE_NODE):
                clusters[0].get('nodes').append(node_info)
            elif vm.get('name').startswith(TYPE_NFS):
                clusters[0].get('nfs_nodes').append(node_info)
        result['body'] = clusters[0]
        return result

    @exception_handler
    def get_node_info(self, cluster_name, node_name):
        """Get the info of a given node in the cluster.

        :param cluster_name: (str): Name of the cluster
        :param node_name: (str): Name of the node

        :return: (dict): Info of the node.
        """
        result = {}
        result['body'] = []
        result['status_code'] = OK
        self._connect_tenant()
        clusters = load_from_metadata(self.tenant_client, name=cluster_name)
        if len(clusters) == 0:
            raise CseServerError(f"Cluster \'{cluster_name}\' not found.")
        vapp = VApp(self.tenant_client, href=clusters[0]['vapp_href'])
        vms = vapp.get_all_vms()
        node_info = None
        for vm in vms:
            if (node_name == vm.get('name')):
                node_info = {
                    'name': vm.get('name'),
                    'numberOfCpus': '',
                    'memoryMB': '',
                    'status': VCLOUD_STATUS_MAP.get(int(vm.get('status'))),
                    'ipAddress': ''
                }
                if hasattr(vm, 'VmSpecSection'):
                    node_info['numberOfCpus'] = vm.VmSpecSection.NumCpus.text
                    node_info[
                        'memoryMB'] = \
                        vm.VmSpecSection.MemoryResourceMb.Configured.text
                try:
                    node_info['ipAddress'] = vapp.get_primary_ip(
                        vm.get('name'))
                except Exception:
                    LOGGER.debug('cannot get ip address '
                                 'for node %s' % vm.get('name'))
                if vm.get('name').startswith(TYPE_MASTER):
                    node_info['node_type'] = 'master'
                elif vm.get('name').startswith(TYPE_NODE):
                    node_info['node_type'] = 'node'
                elif vm.get('name').startswith(TYPE_NFS):
                    node_info['node_type'] = 'nfsd'
                    exports = self._get_nfs_exports(node_info['ipAddress'],
                                                    vapp, vm)
                    node_info['exports'] = exports
        if node_info is None:
            raise CseServerError('Node \'%s\' not found in cluster \'%s\'' %
                                 (node_name, cluster_name))
        result['body'] = node_info
        return result

    def _get_nfs_exports(self, ip, vapp, node):
        """Get the exports from remote NFS server (helper method).

        :param ip: (str): IP address of the NFS server
        :param vapp: (pyvcloud.vcd.vapp.VApp): The vApp or cluster
         to which node belongs
        :param node: (str): IP address of the NFS server
        :param node: (`lxml.objectify.StringElement`) object
        representing the vm resource.

        :return: (List): List of exports
        """
        # TODO(right template) find a right way to retrieve
        # the template from which nfs node was created.
        server_config = get_server_runtime_config()
        template = server_config['broker']['templates'][0]
        script = '#!/usr/bin/env bash\nshowmount -e %s' % ip
        result = execute_script_in_nodes(server_config,
                                         vapp,
                                         template['admin_password'],
                                         script,
                                         nodes=[node],
                                         check_tools=False)
        lines = result[0][1].content.decode().split('\n')
        exports = []
        for index in range(1, len(lines) - 1):
            export = lines[index].strip().split()[0]
            exports.append(export)
        return exports

    @exception_handler
    @secure(required_rights=[CSE_NATIVE_DEPLOY_RIGHT_NAME])
    def create_cluster(self):
        result = {}
        result['body'] = {}

        cluster_name = self.body['name']
        vdc_name = self.body['vdc']
        node_count = self.body['node_count']
        LOGGER.debug('About to create cluster %s on %s with %s nodes, sp=%s',
                     cluster_name, vdc_name, node_count,
                     self.body['storage_profile'])
        result['body'] = {
            'message': 'can\'t create cluster \'%s\'' % cluster_name
        }

        if not self.is_valid_name(cluster_name):
            raise CseServerError(f"Invalid cluster name \'{cluster_name}\'")
        self._connect_tenant()
        self._connect_sys_admin()
        self.cluster_name = cluster_name
        self.cluster_id = str(uuid.uuid4())
        self.op = OP_CREATE_CLUSTER
        self.update_task(TaskStatus.RUNNING,
                         message='Creating cluster %s(%s)' %
                         (cluster_name, self.cluster_id))
        self.daemon = True
        self.start()
        response_body = {}
        response_body['name'] = self.cluster_name
        response_body['cluster_id'] = self.cluster_id
        response_body['task_href'] = self.task_resource.get('href')
        result['body'] = response_body
        result['status_code'] = ACCEPTED
        return result

    @rollback
    def create_cluster_thread(self):
        network_name = self.body['network']
        try:
            clusters = load_from_metadata(self.tenant_client,
                                          name=self.cluster_name)
            if len(clusters) != 0:
                raise ClusterAlreadyExistsError(f"Cluster {self.cluster_name} "
                                                "already exists.")
            org_resource = self.tenant_client.get_org()
            org = Org(self.tenant_client, resource=org_resource)
            vdc_resource = org.get_vdc(self.body['vdc'])
            vdc = VDC(self.tenant_client, resource=vdc_resource)
            template = self.get_template()
            self.update_task(TaskStatus.RUNNING,
                             message='Creating cluster vApp %s(%s)' %
                             (self.cluster_name, self.cluster_id))
            try:
                vapp_resource = vdc.create_vapp(self.cluster_name,
                                                description='cluster %s' %
                                                self.cluster_name,
                                                network=network_name,
                                                fence_mode='bridged')
            except Exception as e:
                raise ClusterOperationError('Error while creating vApp:',
                                            str(e))

            self.tenant_client.get_task_monitor().wait_for_status(
                vapp_resource.Tasks.Task[0])
            tags = {}
            tags['cse.cluster.id'] = self.cluster_id
            tags['cse.version'] = pkg_resources.require(
                'container-service-extension')[0].version
            tags['cse.template'] = template['name']
            vapp = VApp(self.tenant_client, href=vapp_resource.get('href'))
            for k, v in tags.items():
                task = vapp.set_metadata('GENERAL', 'READWRITE', k, v)
                self.tenant_client.get_task_monitor().wait_for_status(task)
            self.update_task(TaskStatus.RUNNING,
                             message='Creating master node for %s(%s)' %
                             (self.cluster_name, self.cluster_id))
            vapp.reload()

            server_config = get_server_runtime_config()
            try:
                add_nodes(1, template, TYPE_MASTER, server_config,
                          self.tenant_client, org, vdc, vapp, self.body)
            except Exception as e:
                raise MasterNodeCreationError(
                    "Error while adding master node:", str(e))

            self.update_task(TaskStatus.RUNNING,
                             message='Initializing cluster %s(%s)' %
                             (self.cluster_name, self.cluster_id))
            vapp.reload()
            init_cluster(server_config, vapp, template)
            master_ip = get_master_ip(server_config, vapp, template)
            task = vapp.set_metadata('GENERAL', 'READWRITE', 'cse.master.ip',
                                     master_ip)
            self.tenant_client.get_task_monitor().wait_for_status(task)
            if self.body['node_count'] > 0:
                self.update_task(TaskStatus.RUNNING,
                                 message='Creating %s node(s) for %s(%s)' %
                                 (self.body['node_count'], self.cluster_name,
                                  self.cluster_id))
                try:
                    add_nodes(self.body['node_count'], template, TYPE_NODE,
                              server_config, self.tenant_client, org, vdc,
                              vapp, self.body)
                except Exception as e:
                    raise WorkerNodeCreationError(
                        "Error while creating worker node:", str(e))

                self.update_task(TaskStatus.RUNNING,
                                 message='Adding %s node(s) to %s(%s)' %
                                 (self.body['node_count'], self.cluster_name,
                                  self.cluster_id))
                vapp.reload()
                join_cluster(server_config, vapp, template)
            if self.body['enable_nfs']:
                self.update_task(TaskStatus.RUNNING,
                                 message='Creating NFS node for %s(%s)' %
                                 (self.cluster_name, self.cluster_id))
                try:
                    add_nodes(1, template, TYPE_NFS, server_config,
                              self.tenant_client, org, vdc, vapp, self.body)
                except Exception as e:
                    raise NFSNodeCreationError(
                        "Error while creating NFS node:", str(e))

            self.update_task(TaskStatus.SUCCESS,
                             message='Created cluster %s(%s)' %
                             (self.cluster_name, self.cluster_id))
        except (MasterNodeCreationError, WorkerNodeCreationError,
                NFSNodeCreationError, ClusterJoiningError,
                ClusterInitializationError, ClusterOperationError) as e:
            LOGGER.error(traceback.format_exc())
            error_obj = error_to_json(e)
            stack_trace = ''.join(error_obj[ERROR_MESSAGE][ERROR_STACKTRACE])
            self.update_task(
                TaskStatus.ERROR,
                error_message=error_obj[ERROR_MESSAGE][ERROR_DESCRIPTION],
                stack_trace=stack_trace)
            raise e
        except Exception as e:
            LOGGER.error(traceback.format_exc())
            error_obj = error_to_json(e)
            stack_trace = ''.join(error_obj[ERROR_MESSAGE][ERROR_STACKTRACE])
            self.update_task(
                TaskStatus.ERROR,
                error_message=error_obj[ERROR_MESSAGE][ERROR_DESCRIPTION],
                stack_trace=stack_trace)
        finally:
            self._disconnect_sys_admin()

    @exception_handler
    @secure(required_rights=[CSE_NATIVE_DEPLOY_RIGHT_NAME])
    def delete_cluster(self):
        result = {}
        result['body'] = {}
        LOGGER.debug(f"About to delete cluster with name: {self.body['name']}")

        self.cluster_name = self.body['name']
        self._connect_tenant()
        self._connect_sys_admin()
        self.op = OP_DELETE_CLUSTER
        clusters = load_from_metadata(self.tenant_client,
                                      name=self.cluster_name)
        if len(clusters) != 1:
            raise CseServerError(f"Cluster {self.cluster_name} not found.")
        self.cluster = clusters[0]
        self.cluster_id = self.cluster['cluster_id']
        self.update_task(TaskStatus.RUNNING,
                         message='Deleting cluster %s(%s)' %
                         (self.cluster_name, self.cluster_id))
        self.daemon = True
        self.start()
        response_body = {}
        response_body['cluster_name'] = self.cluster_name
        response_body['task_href'] = self.task_resource.get('href')
        result['body'] = response_body
        result['status_code'] = ACCEPTED
        return result

    def delete_cluster_thread(self):
        LOGGER.debug('About to delete cluster with name: %s',
                     self.cluster_name)
        try:
            vdc = VDC(self.tenant_client, href=self.cluster['vdc_href'])
            task = vdc.delete_vapp(self.cluster['name'], force=True)
            self.tenant_client.get_task_monitor().wait_for_status(task)
            self.update_task(TaskStatus.SUCCESS,
                             message='Deleted cluster %s(%s)' %
                             (self.cluster_name, self.cluster_id))
        except Exception as e:
            LOGGER.error(traceback.format_exc())
            self.update_task(TaskStatus.ERROR, error_message=str(e))
        finally:
            self._disconnect_sys_admin()

    @exception_handler
    def get_cluster_config(self, cluster_name):
        result = {}
        self._connect_tenant()
        clusters = load_from_metadata(self.tenant_client, name=cluster_name)
        if len(clusters) != 1:
            raise CseServerError('Cluster \'%s\' not found' % cluster_name)
        vapp = VApp(self.tenant_client, href=clusters[0]['vapp_href'])
        template = self.get_template(name=clusters[0]['template'])
        server_config = get_server_runtime_config()
        result['body'] = get_cluster_config(server_config, vapp,
                                            template['admin_password'])
        result['status_code'] = OK
        return result

    @exception_handler
    @secure(required_rights=[CSE_NATIVE_DEPLOY_RIGHT_NAME])
    def create_nodes(self):
        result = {'body': {}}
        self.cluster_name = self.body['name']
        LOGGER.debug(f"About to add {self.body['node_count']} nodes to cluster"
                     " {self.cluster_name} on VDC {self.body['vdc']}, "
                     "sp={self.body['storage_profile']}")
        if self.body['node_count'] < 1:
            raise CseServerError(f"Invalid node count: "
                                 f"{self.body['node_count']}.")
        self._connect_tenant()
        self._connect_sys_admin()
        clusters = load_from_metadata(self.tenant_client,
                                      name=self.cluster_name)
        if len(clusters) != 1:
            raise CseServerError('Cluster \'%s\' not found.' %
                                 self.cluster_name)
        self.cluster = clusters[0]
        self.op = OP_CREATE_NODES
        self.cluster_id = self.cluster['cluster_id']
        self.update_task(
            TaskStatus.RUNNING,
            message=f"Adding {self.body['node_count']} node(s) to cluster "
            "{self.cluster_name}({self.cluster_id})")
        self.daemon = True
        self.start()
        response_body = {}
        response_body['cluster_name'] = self.cluster_name
        response_body['task_href'] = self.task_resource.get('href')
        result['body'] = response_body
        result['status_code'] = ACCEPTED
        return result

    @rollback
    def create_nodes_thread(self):
        LOGGER.debug('About to add nodes to cluster with name: %s',
                     self.cluster_name)
        try:
            server_config = get_server_runtime_config()
            org_resource = self.tenant_client.get_org()
            org = Org(self.tenant_client, resource=org_resource)
            vdc = VDC(self.tenant_client, href=self.cluster['vdc_href'])
            vapp = VApp(self.tenant_client, href=self.cluster['vapp_href'])
            template = self.get_template()
            self.update_task(
                TaskStatus.RUNNING,
                message='Creating %s node(s) for %s(%s)' %
                (self.body['node_count'], self.cluster_name, self.cluster_id))
            new_nodes = add_nodes(self.body['node_count'], template,
                                  self.body['node_type'], server_config,
                                  self.tenant_client, org, vdc, vapp,
                                  self.body)
            if self.body['node_type'] == TYPE_NFS:
                self.update_task(TaskStatus.SUCCESS,
                                 message='Created %s node(s) for %s(%s)' %
                                 (self.body['node_count'], self.cluster_name,
                                  self.cluster_id))
            elif self.body['node_type'] == TYPE_NODE:
                self.update_task(TaskStatus.RUNNING,
                                 message='Adding %s node(s) to %s(%s)' %
                                 (self.body['node_count'], self.cluster_name,
                                  self.cluster_id))
                target_nodes = []
                for spec in new_nodes['specs']:
                    target_nodes.append(spec['target_vm_name'])
                vapp.reload()
                join_cluster(server_config, vapp, template, target_nodes)
                self.update_task(TaskStatus.SUCCESS,
                                 message='Added %s node(s) to cluster %s(%s)' %
                                 (self.body['node_count'], self.cluster_name,
                                  self.cluster_id))
        except NodeCreationError as e:
            error_obj = error_to_json(e)
            LOGGER.error(traceback.format_exc())
            stack_trace = ''.join(error_obj[ERROR_MESSAGE][ERROR_STACKTRACE])
            self.update_task(
                TaskStatus.ERROR,
                error_message=error_obj[ERROR_MESSAGE][ERROR_DESCRIPTION],
                stack_trace=stack_trace)
            raise
        except Exception as e:
            error_obj = error_to_json(e)
            LOGGER.error(traceback.format_exc())
            stack_trace = ''.join(error_obj[ERROR_MESSAGE][ERROR_STACKTRACE])
            self.update_task(
                TaskStatus.ERROR,
                error_message=error_obj[ERROR_MESSAGE][ERROR_DESCRIPTION],
                stack_trace=stack_trace)
        finally:
            self._disconnect_sys_admin()

    @exception_handler
    @secure(required_rights=[CSE_NATIVE_DEPLOY_RIGHT_NAME])
    def delete_nodes(self):
        result = {'body': {}}
        self.cluster_name = self.body['name']
        LOGGER.debug(f"About to delete nodes from cluster with name: "
                     "{self.body['name']}")

        if len(self.body['nodes']) < 1:
            raise CseServerError(f"Invalid list of nodes: "
                                 f"{self.body['nodes']}.")
        for node in self.body['nodes']:
            if node.startswith(TYPE_MASTER):
                raise CseServerError('Can\'t delete a master node: \'%s\'.' %
                                     node)
        self._connect_tenant()
        self._connect_sys_admin()
        clusters = load_from_metadata(self.tenant_client,
                                      name=self.cluster_name)
        if len(clusters) != 1:
            raise CseServerError('Cluster \'%s\' not found.' %
                                 self.cluster_name)
        self.cluster = clusters[0]
        self.op = OP_DELETE_NODES
        self.cluster_id = self.cluster['cluster_id']
        self.update_task(
            TaskStatus.RUNNING,
            message=f"Deleting {len(self.body['nodes'])} node(s) from cluster "
            "{self.cluster_name}({self.cluster_id})")
        self.daemon = True
        self.start()
        response_body = {}
        response_body['cluster_name'] = self.cluster_name
        response_body['task_href'] = self.task_resource.get('href')
        result['body'] = response_body
        result['status_code'] = ACCEPTED
        return result

    def delete_nodes_thread(self):
        LOGGER.debug('About to delete nodes from cluster with name: %s',
                     self.cluster_name)
        try:
            vapp = VApp(self.tenant_client, href=self.cluster['vapp_href'])
            template = self.get_template()
            self.update_task(
                TaskStatus.RUNNING,
                message='Deleting %s node(s) from %s(%s)' %
                (len(self.body['nodes']), self.cluster_name, self.cluster_id))
            try:
                server_config = get_server_runtime_config()
                delete_nodes_from_cluster(server_config, vapp, template,
                                          self.body['nodes'],
                                          self.body['force'])
            except Exception:
                LOGGER.error(f"Couldn't delete node {self.body['nodes']} from "
                             "cluster:{self.cluster_name}")
            self.update_task(
                TaskStatus.RUNNING,
                message='Undeploying %s node(s) for %s(%s)' %
                (len(self.body['nodes']), self.cluster_name, self.cluster_id))
            for vm_name in self.body['nodes']:
                vm = VM(self.tenant_client, resource=vapp.get_vm(vm_name))
                try:
                    task = vm.undeploy()
                    self.tenant_client.get_task_monitor().wait_for_status(task)
                except Exception:
                    LOGGER.warning('couldn\'t undeploy VM %s' % vm_name)
            self.update_task(
                TaskStatus.RUNNING,
                message='Deleting %s VM(s) for %s(%s)' %
                (len(self.body['nodes']), self.cluster_name, self.cluster_id))
            task = vapp.delete_vms(self.body['nodes'])
            self.tenant_client.get_task_monitor().wait_for_status(task)
            self.update_task(
                TaskStatus.SUCCESS,
                message='Deleted %s node(s) to cluster %s(%s)' %
                (len(self.body['nodes']), self.cluster_name, self.cluster_id))
        except Exception as e:
            LOGGER.error(traceback.format_exc())
            error_obj = error_to_json(e)
            stack_trace = ''.join(error_obj[ERROR_MESSAGE][ERROR_STACKTRACE])
            self.update_task(
                TaskStatus.ERROR,
                error_message=error_obj[ERROR_MESSAGE][ERROR_DESCRIPTION],
                stack_trace=stack_trace)
        finally:
            self._disconnect_sys_admin()

    @exception_handler
    def enable_ovdc_for_kubernetes(self):
        """Enable ovdc for k8-cluster deployment on given container provider.

        :return: result object

        :rtype: dict

        :raises CseServerError: if the user is not system administrator.
        """
        result = dict()
        self._connect_tenant()
        if self.tenant_client.is_sysadmin():
            ovdc_cache = OvdcCache(self.tenant_client)
            task = ovdc_cache.set_ovdc_container_provider_metadata(
                self.body['ovdc_name'],
                ovdc_id=self.body.get('ovdc_id', None),
                container_provider=self.body.get('container_provider', None),
                pks_plans=self.body['pks_plans'],
                org_name=self.body.get('org_name', None))
            response_body = dict()
            response_body['ovdc_name'] = self.body['ovdc_name']
            response_body['task_href'] = task.get('href')
            result['body'] = response_body
            result['status_code'] = ACCEPTED
            return result
        else:
            raise CseServerError("Unauthorized Operation")

    @exception_handler
    def ovdc_info_for_kubernetes(self):
        """Info on ovdc for k8s deployment on the given container provider.

        :return: result object

        :rtype: dict

        :raises CseServerError: if the user is not system administrator.
        """
        result = dict()
        self._connect_tenant()
        if self.tenant_client.is_sysadmin():
            ovdc_cache = OvdcCache(self.tenant_client)
            metadata = ovdc_cache.get_ovdc_container_provider_metadata(
                self.body.get('ovdc_name', None),
                ovdc_id=self.body.get('ovdc_id', None),
                org_name=self.body.get('org_name', None))
            # remove username, secret from sending to client
            metadata.pop('username', None)
            metadata.pop('secret', None)
            result = dict()
            result['status_code'] = OK
            result['body'] = metadata
            return result
        else:
            raise CseServerError("Unauthorized Operation")

    def node_rollback(self, node_list):
        """Rollback for node creation failure.

        :param list node_list: faulty nodes to be deleted
        """
        LOGGER.info(f"About to rollback nodes from cluster with name: "
                    "{self.cluster_name}")
        LOGGER.info(f"Node list to be deleted:{node_list}")
        vapp = VApp(self.tenant_client, href=self.cluster['vapp_href'])
        template = self.get_template()
        try:
            server_config = get_server_runtime_config()
            delete_nodes_from_cluster(server_config,
                                      vapp,
                                      template,
                                      node_list,
                                      force=True)
        except Exception:
            LOGGER.warning("Couldn't delete node {node_list} from cluster:"
                           "{self.cluster_name}")
        for vm_name in node_list:
            vm = VM(self.tenant_client, resource=vapp.get_vm(vm_name))
            try:
                vm.undeploy()
            except Exception:
                LOGGER.warning(f"Couldn't undeploy VM {vm_name}")
        vapp.delete_vms(node_list)
        LOGGER.info(f"Successfully deleted nodes: {node_list}")

    def cluster_rollback(self):
        """Rollback for cluster creation failure."""
        LOGGER.info(f"About to rollback cluster with name: "
                    "{self.cluster_name}")
        self._connect_tenant()
        clusters = load_from_metadata(self.tenant_client,
                                      name=self.cluster_name)
        if len(clusters) != 1:
            LOGGER.debug('Cluster %s not found.' % self.cluster_name)
            return
        self.cluster = clusters[0]
        vdc = VDC(self.tenant_client, href=self.cluster['vdc_href'])
        vdc.delete_vapp(self.cluster['name'], force=True)
        LOGGER.info(f"Successfully deleted cluster: {self.cluster_name}")
コード例 #19
0
class VcdBroker(AbstractBroker, threading.Thread):
    def __init__(self, tenant_auth_token, request_spec):
        super().__init__(tenant_auth_token, request_spec)
        threading.Thread.__init__(self)
        self.req_spec = request_spec

        self.tenant_client = None
        self.client_session = None
        self.tenant_info = None

        self.sys_admin_client = None

        self.task = None
        self.task_resource = None
        self.op = None
        self.cluster_name = None
        self.cluster_id = None
        self.daemon = False

    def _connect_sys_admin(self):
        self.sys_admin_client = get_sys_admin_client()

    def _disconnect_sys_admin(self):
        if self.sys_admin_client is not None:
            self.sys_admin_client.logout()
            self.sys_admin_client = None

    def _update_task(self,
                     status,
                     message=None,
                     error_message=None,
                     stack_trace=''):
        if not self.tenant_client.is_sysadmin():
            stack_trace = ''

        if self.task is None:
            self.task = Task(self.sys_admin_client)

        if message is None:
            message = OP_MESSAGE[self.op]

        if self.task_resource is not None:
            task_href = self.task_resource.get('href')
        else:
            task_href = None

        self.task_resource = self.task.update(
            status=status.value,
            namespace='vcloud.cse',
            operation=message,
            operation_name=self.op,
            details='',
            progress=None,
            owner_href=f"urn:cse:cluster:{self.cluster_id}",
            owner_name=self.cluster_name,
            owner_type='application/vcloud.cse.cluster+xml',
            user_href=self.tenant_info['user_id'],
            user_name=self.tenant_info['user_name'],
            org_href=self.tenant_info['org_href'],
            task_href=task_href,
            error_message=error_message,
            stack_trace=stack_trace
        )

    def _is_valid_name(self, name):
        """Validate that the cluster name against the pattern."""
        if len(name) > MAX_HOST_NAME_LENGTH:
            return False
        if name[-1] == '.':
            name = name[:-1]
        allowed = re.compile(r"(?!-)[A-Z\d-]{1,63}(?<!-)$", re.IGNORECASE)
        return all(allowed.match(x) for x in name.split("."))

    def _get_template(self, name=None):
        server_config = get_server_runtime_config()
        name = name or \
            self.req_spec.get(RequestKey.TEMPLATE_NAME) or \
            server_config['broker']['default_template']
        for template in server_config['broker']['templates']:
            if template['name'] == name:
                return template
        raise Exception(f"Template {name} not found.")

    def _get_nfs_exports(self, ip, vapp, node):
        """Get the exports from remote NFS server (helper method).

        :param ip: (str): IP address of the NFS server
        :param vapp: (pyvcloud.vcd.vapp.VApp): The vApp or cluster
         to which node belongs
        :param node: (str): IP address of the NFS server
        :param node: (`lxml.objectify.StringElement`) object
        representing the vm resource.

        :return: (List): List of exports
        """
        # TODO(right template) find a right way to retrieve
        # the template from which nfs node was created.
        server_config = get_server_runtime_config()
        template = server_config['broker']['templates'][0]
        script = f"#!/usr/bin/env bash\nshowmount -e {ip}"
        result = execute_script_in_nodes(
            server_config, vapp, template['admin_password'],
            script, nodes=[node], check_tools=False)
        lines = result[0][1].content.decode().split('\n')
        exports = []
        for index in range(1, len(lines) - 1):
            export = lines[index].strip().split()[0]
            exports.append(export)
        return exports

    def node_rollback(self, node_list):
        """Rollback for node creation failure.

        :param list node_list: faulty nodes to be deleted
        """
        LOGGER.info(f"About to rollback nodes from cluster with name: "
                    "{self.cluster_name}")
        LOGGER.info(f"Node list to be deleted:{node_list}")
        vapp = VApp(self.tenant_client, href=self.cluster['vapp_href'])
        template = self._get_template()
        try:
            server_config = get_server_runtime_config()
            delete_nodes_from_cluster(server_config, vapp, template,
                                      node_list, force=True)
        except Exception:
            LOGGER.warning("Couldn't delete node {node_list} from cluster:"
                           "{self.cluster_name}")
        for vm_name in node_list:
            vm = VM(self.tenant_client, resource=vapp.get_vm(vm_name))
            try:
                vm.undeploy()
            except Exception:
                LOGGER.warning(f"Couldn't undeploy VM {vm_name}")
        vapp.delete_vms(node_list)
        LOGGER.info(f"Successfully deleted nodes: {node_list}")

    def cluster_rollback(self):
        """Rollback for cluster creation failure."""
        LOGGER.info(f"About to rollback cluster with name: "
                    "{self.cluster_name}")
        self._connect_tenant()
        clusters = load_from_metadata(
            self.tenant_client, name=self.cluster_name)
        if len(clusters) != 1:
            LOGGER.debug(f"Cluster {self.cluster_name} not found.")
            return
        self.cluster = clusters[0]
        vdc = VDC(self.tenant_client, href=self.cluster['vdc_href'])
        vdc.delete_vapp(self.cluster['name'], force=True)
        LOGGER.info(f"Successfully deleted cluster: {self.cluster_name}")

    def run(self):
        LOGGER.debug(f"Thread started for operation={self.op}")
        if self.op == OP_CREATE_CLUSTER:
            self.create_cluster_thread()
        elif self.op == OP_DELETE_CLUSTER:
            self.delete_cluster_thread()
        elif self.op == OP_CREATE_NODES:
            self.create_nodes_thread()
        elif self.op == OP_DELETE_NODES:
            self.delete_nodes_thread()

    def list_clusters(self):
        self._connect_tenant()
        clusters = []
        raw_clusters = load_from_metadata(
            self.tenant_client,
            org_name=self.req_spec.get(RequestKey.ORG_NAME),
            vdc_name=self.req_spec.get(RequestKey.OVDC_NAME))
        for c in raw_clusters:
            clusters.append({
                'name': c['name'],
                'IP master': c['leader_endpoint'],
                'template': c['template'],
                'VMs': c['number_of_vms'],
                'vdc': c['vdc_name'],
                'status': c['status'],
                'vdc_id': c['vdc_id'],
                'org_name': get_org_name_from_ovdc_id(c['vdc_id']),
                K8S_PROVIDER_KEY: K8sProviders.NATIVE
            })
        return clusters

    def get_cluster_info(self, cluster_name):
        """Get the info of the cluster.

        :param cluster_name: (str): Name of the cluster

        :return: (dict): Info of the cluster.
        """
        self._connect_tenant()
        clusters = load_from_metadata(
            self.tenant_client,
            name=cluster_name,
            org_name=self.req_spec.get(RequestKey.ORG_NAME),
            vdc_name=self.req_spec.get(RequestKey.OVDC_NAME))
        if len(clusters) > 1:
            raise CseDuplicateClusterError(f"Multiple clusters of name"
                                           f" '{cluster_name}' detected.")
        if len(clusters) == 0:
            raise ClusterNotFoundError(f"Cluster '{cluster_name}' not found.")

        cluster = clusters[0]
        cluster[K8S_PROVIDER_KEY] = K8sProviders.NATIVE
        vapp = VApp(self.tenant_client, href=clusters[0]['vapp_href'])
        vms = vapp.get_all_vms()
        for vm in vms:
            node_info = {
                'name': vm.get('name'),
                'ipAddress': ''
            }
            try:
                node_info['ipAddress'] = vapp.get_primary_ip(
                    vm.get('name'))
            except Exception:
                LOGGER.debug(f"Unable to get ip address of node "
                             f"{vm.get('name')}")
            if vm.get('name').startswith(NodeType.MASTER):
                cluster.get('master_nodes').append(node_info)
            elif vm.get('name').startswith(NodeType.WORKER):
                cluster.get('nodes').append(node_info)
            elif vm.get('name').startswith(NodeType.NFS):
                cluster.get('nfs_nodes').append(node_info)
        return cluster

    def get_node_info(self, cluster_name, node_name):
        """Get the info of a given node in the cluster.

        :param cluster_name: (str): Name of the cluster
        :param node_name: (str): Name of the node

        :return: (dict): Info of the node.
        """
        self._connect_tenant()
        clusters = load_from_metadata(
            self.tenant_client,
            name=cluster_name,
            org_name=self.req_spec.get(RequestKey.ORG_NAME),
            vdc_name=self.req_spec.get(RequestKey.OVDC_NAME))
        if len(clusters) > 1:
            raise CseDuplicateClusterError(f"Multiple clusters of name"
                                           f" '{cluster_name}' detected.")
        if len(clusters) == 0:
            raise ClusterNotFoundError(f"Cluster '{cluster_name}' not found.")

        vapp = VApp(self.tenant_client, href=clusters[0]['vapp_href'])
        vms = vapp.get_all_vms()
        node_info = None
        for vm in vms:
            if (node_name == vm.get('name')):
                node_info = {
                    'name': vm.get('name'),
                    'numberOfCpus': '',
                    'memoryMB': '',
                    'status': VCLOUD_STATUS_MAP.get(int(vm.get('status'))),
                    'ipAddress': ''
                }
                if hasattr(vm, 'VmSpecSection'):
                    node_info[
                        'numberOfCpus'] = vm.VmSpecSection.NumCpus.text
                    node_info[
                        'memoryMB'] = \
                        vm.VmSpecSection.MemoryResourceMb.Configured.text
                try:
                    node_info['ipAddress'] = vapp.get_primary_ip(
                        vm.get('name'))
                except Exception:
                    LOGGER.debug(f"Unable to get ip address of node "
                                 f"{vm.get('name')}")
                if vm.get('name').startswith(NodeType.MASTER):
                    node_info['node_type'] = 'master'
                elif vm.get('name').startswith(NodeType.WORKER):
                    node_info['node_type'] = 'worker'
                elif vm.get('name').startswith(NodeType.NFS):
                    node_info['node_type'] = 'nfs'
                    exports = self._get_nfs_exports(node_info['ipAddress'],
                                                    vapp,
                                                    vm)
                    node_info['exports'] = exports
        if node_info is None:
            raise NodeNotFoundError(f"Node '{node_name}' not found in "
                                    f"cluster '{cluster_name}'")
        return node_info

    def get_cluster_config(self, cluster_name):
        self._connect_tenant()
        clusters = load_from_metadata(
            self.tenant_client,
            name=cluster_name,
            org_name=self.req_spec.get(RequestKey.ORG_NAME),
            vdc_name=self.req_spec.get(RequestKey.OVDC_NAME))
        if len(clusters) > 1:
            raise CseDuplicateClusterError(f"Multiple clusters of name"
                                           f" '{cluster_name}' detected.")
        if len(clusters) == 0:
            raise ClusterNotFoundError(f"Cluster '{cluster_name}' not found.")

        vapp = VApp(self.tenant_client, href=clusters[0]['vapp_href'])
        template = self._get_template(name=clusters[0]['template'])
        server_config = get_server_runtime_config()
        result = get_cluster_config(server_config, vapp,
                                    template['admin_password'])
        return result

    @secure(required_rights=[CSE_NATIVE_DEPLOY_RIGHT_NAME])
    def create_cluster(self, cluster_name, vdc_name, node_count,
                       storage_profile, network_name, template, **kwargs):

        # TODO(ClusterSpec) Create an inner class "ClusterSpec"
        #  in abstract_broker.py and have subclasses define and use it
        #  as instance variable.
        #  Method 'Create_cluster' in VcdBroker and PksBroker should take
        #  ClusterParams either as a param (or)
        #  read from instance variable (if needed only).

        if not network_name:
            raise CseServerError(f"Cluster cannot be created. "
                                 f"Please provide a valid value for org "
                                 f"vDC network param.")

        LOGGER.debug(f"About to create cluster {cluster_name} on {vdc_name} "
                     f"with {node_count} nodes, sp={storage_profile}")

        if not self._is_valid_name(cluster_name):
            raise CseServerError(f"Invalid cluster name '{cluster_name}'")
        self._connect_tenant()
        self._connect_sys_admin()
        self.cluster_name = cluster_name
        self.cluster_id = str(uuid.uuid4())
        self.op = OP_CREATE_CLUSTER
        self._update_task(
            TaskStatus.RUNNING,
            message=f"Creating cluster {cluster_name}({self.cluster_id})")
        self.daemon = True
        self.start()
        result = {}
        result['name'] = self.cluster_name
        result['cluster_id'] = self.cluster_id
        result['task_href'] = self.task_resource.get('href')
        return result

    @rollback_on_failure
    def create_cluster_thread(self):
        network_name = self.req_spec.get(RequestKey.NETWORK_NAME)
        try:
            clusters = load_from_metadata(self.tenant_client,
                                          name=self.cluster_name)
            if len(clusters) != 0:
                raise ClusterAlreadyExistsError(f"Cluster {self.cluster_name} "
                                                "already exists.")

            org_resource = self.tenant_client.get_org_by_name(
                self.req_spec.get(RequestKey.ORG_NAME))
            org = Org(self.tenant_client, resource=org_resource)
            vdc_resource = org.get_vdc(self.req_spec.get(RequestKey.OVDC_NAME))
            vdc = VDC(self.tenant_client, resource=vdc_resource)
            template = self._get_template()
            self._update_task(
                TaskStatus.RUNNING,
                message=f"Creating cluster vApp {self.cluster_name}"
                        f"({self.cluster_id})")
            try:
                vapp_resource = vdc.create_vapp(
                    self.cluster_name,
                    description=f"cluster {self.cluster_name}",
                    network=network_name,
                    fence_mode='bridged')
            except Exception as e:
                raise ClusterOperationError(
                    "Error while creating vApp:", str(e))

            self.tenant_client.get_task_monitor().wait_for_status(
                vapp_resource.Tasks.Task[0])
            tags = {}
            tags['cse.cluster.id'] = self.cluster_id
            tags['cse.version'] = pkg_resources.require(
                'container-service-extension')[0].version
            tags['cse.template'] = template['name']
            vapp = VApp(self.tenant_client, href=vapp_resource.get('href'))
            for k, v in tags.items():
                task = vapp.set_metadata('GENERAL', 'READWRITE', k, v)
                self.tenant_client.get_task_monitor().wait_for_status(task)
            self._update_task(
                TaskStatus.RUNNING,
                message=f"Creating master node for {self.cluster_name}"
                        f"({self.cluster_id})")
            vapp.reload()

            server_config = get_server_runtime_config()
            try:
                add_nodes(1, template, NodeType.MASTER, server_config,
                          self.tenant_client, org, vdc, vapp, self.req_spec)
            except Exception as e:
                raise MasterNodeCreationError(
                    "Error while adding master node:", str(e))

            self._update_task(
                TaskStatus.RUNNING,
                message=f"Initializing cluster {self.cluster_name}"
                        f"({self.cluster_id})")
            vapp.reload()
            init_cluster(server_config, vapp, template)
            master_ip = get_master_ip(server_config, vapp, template)
            task = vapp.set_metadata('GENERAL', 'READWRITE', 'cse.master.ip',
                                     master_ip)
            self.tenant_client.get_task_monitor().wait_for_status(task)
            if self.req_spec.get(RequestKey.NUM_WORKERS) > 0:
                self._update_task(
                    TaskStatus.RUNNING,
                    message=f"Creating "
                            f"{self.req_spec.get(RequestKey.NUM_WORKERS)} "
                            f"node(s) for "
                            f"{self.cluster_name}({self.cluster_id})")
                try:
                    add_nodes(self.req_spec.get(RequestKey.NUM_WORKERS),
                              template, NodeType.WORKER, server_config,
                              self.tenant_client, org, vdc, vapp,
                              self.req_spec)
                except Exception as e:
                    raise WorkerNodeCreationError(
                        "Error while creating worker node:", str(e))

                self._update_task(
                    TaskStatus.RUNNING,
                    message=f"Adding "
                            f"{self.req_spec.get(RequestKey.NUM_WORKERS)} "
                            f"node(s) to "
                            f"{self.cluster_name}({self.cluster_id})")
                vapp.reload()
                join_cluster(server_config, vapp, template)
            if self.req_spec.get(RequestKey.ENABLE_NFS):
                self._update_task(
                    TaskStatus.RUNNING,
                    message=f"Creating NFS node for {self.cluster_name}"
                            f"({self.cluster_id})")
                try:
                    add_nodes(1, template, NodeType.NFS,
                              server_config, self.tenant_client, org, vdc,
                              vapp, self.req_spec)
                except Exception as e:
                    raise NFSNodeCreationError(
                        "Error while creating NFS node:", str(e))

            self._update_task(
                TaskStatus.SUCCESS,
                message=f"Created cluster {self.cluster_name}"
                        f"({self.cluster_id})")
        except (MasterNodeCreationError, WorkerNodeCreationError,
                NFSNodeCreationError, ClusterJoiningError,
                ClusterInitializationError, ClusterOperationError) as e:
            LOGGER.error(traceback.format_exc())
            error_obj = error_to_json(e)
            stack_trace = \
                ''.join(error_obj[ERROR_MESSAGE_KEY][ERROR_STACKTRACE_KEY])
            self._update_task(
                TaskStatus.ERROR,
                error_message=error_obj[ERROR_MESSAGE_KEY]
                [ERROR_DESCRIPTION_KEY],
                stack_trace=stack_trace)
            raise e
        except Exception as e:
            LOGGER.error(traceback.format_exc())
            error_obj = error_to_json(e)
            stack_trace = \
                ''.join(error_obj[ERROR_MESSAGE_KEY][ERROR_STACKTRACE_KEY])
            self._update_task(
                TaskStatus.ERROR,
                error_message=error_obj[ERROR_MESSAGE_KEY][ERROR_DESCRIPTION_KEY], # noqa: E501
                stack_trace=stack_trace)
        finally:
            self._disconnect_sys_admin()

    @secure(required_rights=[CSE_NATIVE_DEPLOY_RIGHT_NAME])
    def delete_cluster(self, cluster_name):
        LOGGER.debug(f"About to delete cluster with name: {cluster_name}")

        self.cluster_name = cluster_name
        self._connect_tenant()
        self._connect_sys_admin()
        self.op = OP_DELETE_CLUSTER
        clusters = load_from_metadata(
            self.tenant_client, name=self.cluster_name,
            org_name=self.req_spec.get(RequestKey.ORG_NAME),
            vdc_name=self.req_spec.get(RequestKey.OVDC_NAME))
        if len(clusters) > 1:
            raise CseDuplicateClusterError(
                f"Multiple clusters of name '{self.cluster_name}' detected.")
        if len(clusters) != 1:
            raise ClusterNotFoundError(
                f"Cluster {self.cluster_name} not found.")
        self.cluster = clusters[0]
        self.cluster_id = self.cluster['cluster_id']
        self._update_task(
            TaskStatus.RUNNING,
            message=f"Deleting cluster {self.cluster_name}"
                    f"({self.cluster_id})")
        self.daemon = True
        self.start()
        result = {}
        result['cluster_name'] = self.cluster_name
        result['task_href'] = self.task_resource.get('href')
        return result

    def delete_cluster_thread(self):
        LOGGER.debug(f"About to delete cluster with name: {self.cluster_name}")
        try:
            vdc = VDC(self.tenant_client, href=self.cluster['vdc_href'])
            task = vdc.delete_vapp(self.cluster['name'], force=True)
            self.tenant_client.get_task_monitor().wait_for_status(task)
            self._update_task(
                TaskStatus.SUCCESS,
                message=f"Deleted cluster {self.cluster_name}"
                        f"({self.cluster_id})")
        except Exception as e:
            LOGGER.error(traceback.format_exc())
            self._update_task(TaskStatus.ERROR, error_message=str(e))
        finally:
            self._disconnect_sys_admin()

    @secure(required_rights=[CSE_NATIVE_DEPLOY_RIGHT_NAME])
    def create_nodes(self):
        self.cluster_name = self.req_spec.get(RequestKey.CLUSTER_NAME)
        LOGGER.debug(f"About to add "
                     f"{self.req_spec.get(RequestKey.NUM_WORKERS)} nodes to "
                     f"cluster {self.cluster_name} on VDC "
                     f"{self.req_spec.get(RequestKey.OVDC_NAME)}")
        if self.req_spec.get(RequestKey.NUM_WORKERS) < 1:
            raise CseServerError(f"Invalid node count: {self.req_spec.get(RequestKey.NUM_WORKERS)}.") # noqa: E501
        if self.req_spec.get(RequestKey.NETWORK_NAME) is None:
            raise CseServerError(f'Network name is missing from the request.')

        self._connect_tenant()
        self._connect_sys_admin()
        clusters = load_from_metadata(
            self.tenant_client, name=self.cluster_name,
            org_name=self.req_spec.get(RequestKey.ORG_NAME),
            vdc_name=self.req_spec.get(RequestKey.OVDC_NAME))

        if len(clusters) > 1:
            raise CseDuplicateClusterError(f"Multiple clusters of name "
                                           f"'{self.cluster_name}' detected.")
        if len(clusters) == 0:
            raise ClusterNotFoundError(
                f"Cluster '{self.cluster_name}' not found.")

        self.cluster = clusters[0]
        self.op = OP_CREATE_NODES
        self.cluster_id = self.cluster['cluster_id']
        self._update_task(
            TaskStatus.RUNNING,
            message=f"Adding {self.req_spec.get(RequestKey.NUM_WORKERS)} "
                    f"node(s) to cluster "
                    f"{self.cluster_name}({self.cluster_id})")
        self.daemon = True
        self.start()
        result = {}
        result['cluster_name'] = self.cluster_name
        result['task_href'] = self.task_resource.get('href')
        return result

    @rollback_on_failure
    def create_nodes_thread(self):
        LOGGER.debug(f"About to add nodes to cluster with name: "
                     f"{self.cluster_name}")
        try:
            server_config = get_server_runtime_config()
            org_resource = self.tenant_client.get_org()
            org = Org(self.tenant_client, resource=org_resource)
            vdc = VDC(self.tenant_client, href=self.cluster['vdc_href'])
            vapp = VApp(self.tenant_client, href=self.cluster['vapp_href'])
            template = self._get_template()
            self._update_task(
                TaskStatus.RUNNING,
                message=f"Creating {self.req_spec.get(RequestKey.NUM_WORKERS)}"
                        f" node(s) for {self.cluster_name}({self.cluster_id})")

            node_type = NodeType.WORKER
            if self.req_spec.get(RequestKey.ENABLE_NFS):
                node_type = NodeType.NFS

            new_nodes = add_nodes(self.req_spec.get(RequestKey.NUM_WORKERS),
                                  template, node_type, server_config,
                                  self.tenant_client, org, vdc, vapp,
                                  self.req_spec)
            if node_type == NodeType.NFS:
                self._update_task(
                    TaskStatus.SUCCESS,
                    message=f"Created "
                            f"{self.req_spec.get(RequestKey.NUM_WORKERS)} "
                            f"node(s) for "
                            f"{self.cluster_name}({self.cluster_id})")
            elif node_type == NodeType.WORKER:
                self._update_task(
                    TaskStatus.RUNNING,
                    message=f"Adding "
                            f"{self.req_spec.get(RequestKey.NUM_WORKERS)} "
                            f"node(s) to cluster "
                            f"{self.cluster_name}({self.cluster_id})")
                target_nodes = []
                for spec in new_nodes['specs']:
                    target_nodes.append(spec['target_vm_name'])
                vapp.reload()
                join_cluster(server_config, vapp, template, target_nodes)
                self._update_task(
                    TaskStatus.SUCCESS,
                    message=f"Added "
                            f"{self.req_spec.get(RequestKey.NUM_WORKERS)} "
                            f"node(s) to cluster "
                            f"{self.cluster_name}({self.cluster_id})")
        except NodeCreationError as e:
            error_obj = error_to_json(e)
            LOGGER.error(traceback.format_exc())
            stack_trace = \
                ''.join(error_obj[ERROR_MESSAGE_KEY][ERROR_STACKTRACE_KEY])
            self._update_task(
                TaskStatus.ERROR,
                error_message=error_obj[ERROR_MESSAGE_KEY][ERROR_DESCRIPTION_KEY], # noqa: E501
                stack_trace=stack_trace)
            raise
        except Exception as e:
            error_obj = error_to_json(e)
            LOGGER.error(traceback.format_exc())
            stack_trace = \
                ''.join(error_obj[ERROR_MESSAGE_KEY][ERROR_STACKTRACE_KEY])
            self._update_task(
                TaskStatus.ERROR,
                error_message=error_obj[ERROR_MESSAGE_KEY][ERROR_DESCRIPTION_KEY], # noqa: E501
                stack_trace=stack_trace)
        finally:
            self._disconnect_sys_admin()

    @secure(required_rights=[CSE_NATIVE_DEPLOY_RIGHT_NAME])
    def delete_nodes(self):
        result = {'body': {}}
        self.cluster_name = self.req_spec.get(RequestKey.CLUSTER_NAME)
        LOGGER.debug(f"About to delete nodes from cluster with name: "
                     f"{self.req_spec.get(RequestKey.CLUSTER_NAME)}")

        if len(self.req_spec.get(RequestKey.NODE_NAMES_LIST)) < 1:
            raise CseServerError(f"Invalid list of nodes: {self.req_spec.get(RequestKey.NODE_NAMES_LIST)}.") # noqa: E501
        for node in self.req_spec.get(RequestKey.NODE_NAMES_LIST):
            if node.startswith(NodeType.MASTER):
                raise CseServerError(f"Can't delete a master node: '{node}'.")
        self._connect_tenant()
        self._connect_sys_admin()
        clusters = load_from_metadata(
            self.tenant_client, name=self.cluster_name,
            org_name=self.req_spec.get(RequestKey.ORG_NAME),
            vdc_name=self.req_spec.get(RequestKey.OVDC_NAME))
        if len(clusters) <= 0:
            raise CseServerError(f"Cluster '{self.cluster_name}' not found.")

        if len(clusters) > 1:
            raise CseDuplicateClusterError(f"Multiple clusters of name "
                                           f"'{self.cluster_name}' detected.")
        self.cluster = clusters[0]
        self.op = OP_DELETE_NODES
        self.cluster_id = self.cluster['cluster_id']
        self._update_task(
            TaskStatus.RUNNING,
            message=f"Deleting "
                    f"{len(self.req_spec.get(RequestKey.NODE_NAMES_LIST))} "
                    f"node(s) from cluster "
                    f"{self.cluster_name}({self.cluster_id})")
        self.daemon = True
        self.start()
        result = {
            'cluster_name': self.cluster_name,
            'task_href': self.task_resource.get('href')
        }
        return result

    def delete_nodes_thread(self):
        LOGGER.debug(f"About to delete nodes from cluster with name: "
                     f"{self.cluster_name}")
        try:
            vapp = VApp(self.tenant_client, href=self.cluster['vapp_href'])
            template = self._get_template()
            self._update_task(
                TaskStatus.RUNNING,
                message=f"Deleting "
                        f"{len(self.req_spec.get(RequestKey.NODE_NAMES_LIST))}"
                        f" node(s) from "
                        f"{self.cluster_name}({self.cluster_id})")
            try:
                server_config = get_server_runtime_config()
                delete_nodes_from_cluster(
                    server_config,
                    vapp,
                    template,
                    self.req_spec.get(RequestKey.NODE_NAMES_LIST),
                    self.req_spec.get(RequestKey.FORCE_DELETE))
            except Exception:
                LOGGER.error(f"Couldn't delete node "
                             f"{self.req_spec.get(RequestKey.NODE_NAMES_LIST)}"
                             f" from cluster:{self.cluster_name}")
            self._update_task(
                TaskStatus.RUNNING,
                message=f"Undeploying "
                        f"{len(self.req_spec.get(RequestKey.NODE_NAMES_LIST))}"
                        f" node(s) for {self.cluster_name}({self.cluster_id})")
            for vm_name in self.req_spec.get(RequestKey.NODE_NAMES_LIST):
                vm = VM(self.tenant_client, resource=vapp.get_vm(vm_name))
                try:
                    task = vm.undeploy()
                    self.tenant_client.get_task_monitor().wait_for_status(task)
                except Exception:
                    LOGGER.warning(f"Couldn't undeploy VM {vm_name}")
            self._update_task(
                TaskStatus.RUNNING,
                message=f"Deleting "
                        f"{len(self.req_spec.get(RequestKey.NODE_NAMES_LIST))}"
                        f" VM(s) for {self.cluster_name}({self.cluster_id})")
            task = vapp.delete_vms(self.req_spec.get(RequestKey.NODE_NAMES_LIST)) # noqa: E501
            self.tenant_client.get_task_monitor().wait_for_status(task)
            self._update_task(
                TaskStatus.SUCCESS,
                message=f"Deleted "
                        f"{len(self.req_spec.get(RequestKey.NODE_NAMES_LIST))}"
                        f" node(s) to cluster "
                        f"{self.cluster_name}({self.cluster_id})")
        except Exception as e:
            LOGGER.error(traceback.format_exc())
            error_obj = error_to_json(e)
            stack_trace = \
                ''.join(error_obj[ERROR_MESSAGE_KEY][ERROR_STACKTRACE_KEY])
            self._update_task(
                TaskStatus.ERROR,
                error_message=error_obj[ERROR_MESSAGE_KEY][ERROR_DESCRIPTION_KEY],  # noqa: E501
                stack_trace=stack_trace)
        finally:
            self._disconnect_sys_admin()

    @secure(required_rights=[CSE_NATIVE_DEPLOY_RIGHT_NAME])
    def resize_cluster(self, cluster_name, node_count, curr_cluster_info=None):
        """Resize the cluster of a given name to given number of worker nodes.

        :param str name: Name of the cluster
        :param int node_count: New size of the worker nodes
        (should be greater than the current number).
        :param dict curr_cluster_info: Current properties of the cluster

        :return response: response returned by create_nodes()
        :rtype: dict
        """
        if curr_cluster_info:
            curr_worker_count = len(curr_cluster_info['nodes'])
        else:
            cluster = self.get_cluster_info(cluster_name=cluster_name)
            curr_worker_count = len(cluster['nodes'])

        if curr_worker_count > node_count:
            raise CseServerError(f"Automatic scale down is not supported for "
                                 f"vCD powered Kubernetes clusters. Use "
                                 f"'vcd cse delete node' command.")
        elif curr_worker_count == node_count:
            raise CseServerError(f"Cluster - {cluster_name} is already at the "
                                 f"size of {curr_worker_count}.")

        self.req_spec[RequestKey.NUM_WORKERS] = node_count - curr_worker_count
        response = self.create_nodes()
        return response
コード例 #20
0
class VcdBroker(AbstractBroker):
    """Handles cluster operations for 'native' k8s provider."""

    def __init__(self, tenant_auth_token):
        self.tenant_client = None
        self.client_session = None
        self.tenant_user_name = None
        self.tenant_user_id = None
        self.tenant_org_name = None
        self.tenant_org_href = None
        # populates above attributes
        super().__init__(tenant_auth_token)

        self._sys_admin_client = None # private: use sys_admin_client property
        self.task = None
        self.task_resource = None

    @property
    def sys_admin_client(self):
        if self._sys_admin_client is None:
            self._sys_admin_client = vcd_utils.get_sys_admin_client()
        return self._sys_admin_client

    def logout_sys_admin_client(self):
        if self._sys_admin_client is not None:
            self._sys_admin_client.logout()
        self._sys_admin_client = None

    def get_cluster_info(self, data):
        """Get cluster metadata as well as node data.

        Common broker function that validates data for the 'cluster info'
        operation and returns cluster/node metadata as dictionary.

        Required data: cluster_name
        Optional data and default values: org_name=None, ovdc_name=None
        """
        required = [
            RequestKey.CLUSTER_NAME
        ]
        utils.ensure_keys_in_dict(required, data, dict_name='data')
        defaults = {
            RequestKey.ORG_NAME: None,
            RequestKey.OVDC_NAME: None
        }
        validated_data = {**defaults, **data}
        cluster_name = validated_data[RequestKey.CLUSTER_NAME]
        cluster = get_cluster(self.tenant_client, cluster_name,
                              org_name=validated_data[RequestKey.ORG_NAME],
                              ovdc_name=validated_data[RequestKey.OVDC_NAME])

        cluster[K8S_PROVIDER_KEY] = K8sProvider.NATIVE
        vapp = VApp(self.tenant_client, href=cluster['vapp_href'])
        vms = vapp.get_all_vms()
        for vm in vms:
            node_info = {
                'name': vm.get('name'),
                'ipAddress': ''
            }
            try:
                node_info['ipAddress'] = vapp.get_primary_ip(vm.get('name'))
            except Exception:
                LOGGER.debug(f"Unable to get ip address of node "
                             f"{vm.get('name')}")
            if vm.get('name').startswith(NodeType.MASTER):
                cluster.get('master_nodes').append(node_info)
            elif vm.get('name').startswith(NodeType.WORKER):
                cluster.get('nodes').append(node_info)
            elif vm.get('name').startswith(NodeType.NFS):
                cluster.get('nfs_nodes').append(node_info)

        return cluster

    def list_clusters(self, data):
        """List all native clusters and their relevant metadata.

        Common broker function that validates data for the 'list clusters'
        operation and returns a list of cluster data.

        Optional data and default values: org_name=None, ovdc_name=None
        """
        defaults = {
            RequestKey.ORG_NAME: None,
            RequestKey.OVDC_NAME: None
        }
        validated_data = {**defaults, **data}

        raw_clusters = get_all_clusters(
            self.tenant_client,
            org_name=validated_data[RequestKey.ORG_NAME],
            ovdc_name=validated_data[RequestKey.OVDC_NAME])

        clusters = []
        for c in raw_clusters:
            clusters.append({
                'name': c['name'],
                'IP master': c['leader_endpoint'],
                'template_name': c.get('template_name'),
                'template_revision': c.get('template_revision'),
                'k8s_version': c.get('k8s_version'),
                'VMs': c['number_of_vms'],
                'vdc': c['vdc_name'],
                'status': c['status'],
                'vdc_id': c['vdc_id'],
                'org_name': vcd_utils.get_org_name_from_ovdc_id(c['vdc_id']),
                K8S_PROVIDER_KEY: K8sProvider.NATIVE
            })
        return clusters

    def get_cluster_config(self, data):
        """Get the cluster's kube config contents.

        Common broker function that validates data for 'cluster config'
        operation and returns the cluster's kube config file contents
        as a string.

        Required data: cluster_name
        Optional data and default values: org_name=None, ovdc_name=None
        """
        required = [
            RequestKey.CLUSTER_NAME
        ]
        utils.ensure_keys_in_dict(required, data, dict_name='data')
        defaults = {
            RequestKey.ORG_NAME: None,
            RequestKey.OVDC_NAME: None
        }
        validated_data = {**defaults, **data}

        cluster_name = validated_data[RequestKey.CLUSTER_NAME]
        cluster = get_cluster(self.tenant_client, cluster_name,
                              org_name=validated_data[RequestKey.ORG_NAME],
                              ovdc_name=validated_data[RequestKey.OVDC_NAME])
        vapp = VApp(self.tenant_client, href=cluster['vapp_href'])
        node_names = get_node_names(vapp, NodeType.MASTER)

        all_results = []
        try:
            for node_name in node_names:
                LOGGER.debug(f"getting file from node {node_name}")
                password = vapp.get_admin_password(node_name)
                vs = vs_utils.get_vsphere(self.sys_admin_client, vapp,
                                          vm_name=node_name, logger=LOGGER)
                vs.connect()
                moid = vapp.get_vm_moid(node_name)
                vm = vs.get_vm_by_moid(moid)
                filename = '/root/.kube/config'
                result = vs.download_file_from_guest(vm, 'root',
                                                     password,
                                                     filename)
                all_results.append(result)
        finally:
            self.logout_sys_admin_client()

        if len(all_results) == 0 or all_results[0].status_code != requests.codes.ok: # noqa: E501
            raise ClusterOperationError("Couldn't get cluster configuration")
        return all_results[0].content.decode()

    @secure(required_rights=[CSE_NATIVE_DEPLOY_RIGHT_NAME])
    def create_cluster(self, data):
        """Start the cluster creation operation.

        Common broker function that validates data for the 'create cluster'
        operation and returns a dictionary with cluster detail and task
        information. Calls the asyncronous cluster create function that
        actually performs the work. The returned `result['task_href']` can
        be polled to get updates on task progress.

        Required data: cluster_name, org_name, ovdc_name, network_name
        Optional data and default values: num_nodes=2, num_cpu=None,
            mb_memory=None, storage_profile_name=None, ssh_key_filepath=None,
            template_name=default, template_revision=default, enable_nfs=False,
            rollback=True
        """
        required = [
            RequestKey.CLUSTER_NAME,
            RequestKey.ORG_NAME,
            RequestKey.OVDC_NAME,
            RequestKey.NETWORK_NAME
        ]
        utils.ensure_keys_in_dict(required, data, dict_name='data')
        cluster_name = data[RequestKey.CLUSTER_NAME]
        # check that cluster name is syntactically valid
        if not is_valid_cluster_name(cluster_name):
            raise CseServerError(f"Invalid cluster name '{cluster_name}'")
        # check that cluster name doesn't already exist
        try:
            get_cluster(self.tenant_client, cluster_name,
                        org_name=data[RequestKey.ORG_NAME],
                        ovdc_name=data[RequestKey.OVDC_NAME])
            raise ClusterAlreadyExistsError(f"Cluster {cluster_name} "
                                            f"already exists.")
        except ClusterNotFoundError:
            pass
        # check that requested/default template is valid
        template = get_template(
            name=data.get(RequestKey.TEMPLATE_NAME),
            revision=data.get(RequestKey.TEMPLATE_REVISION))
        defaults = {
            RequestKey.NUM_WORKERS: 2,
            RequestKey.NUM_CPU: None,
            RequestKey.MB_MEMORY: None,
            RequestKey.STORAGE_PROFILE_NAME: None,
            RequestKey.SSH_KEY_FILEPATH: None,
            RequestKey.TEMPLATE_NAME: template[LocalTemplateKey.NAME],
            RequestKey.TEMPLATE_REVISION: template[LocalTemplateKey.REVISION],
            RequestKey.ENABLE_NFS: False,
            RequestKey.ROLLBACK: True,
        }
        validated_data = {**defaults, **data}

        # TODO HACK default dictionary combining needs to be fixed
        validated_data[RequestKey.TEMPLATE_NAME] = validated_data[RequestKey.TEMPLATE_NAME] or template[LocalTemplateKey.NAME] # noqa: E501
        validated_data[RequestKey.TEMPLATE_REVISION] = validated_data[RequestKey.TEMPLATE_REVISION] or template[LocalTemplateKey.REVISION] # noqa: E501

        template_name = validated_data[RequestKey.TEMPLATE_NAME]
        template_revision = validated_data[RequestKey.TEMPLATE_REVISION]

        # check that requested number of worker nodes is at least more than 1
        num_workers = validated_data[RequestKey.NUM_WORKERS]
        if num_workers < 1:
            raise CseServerError(f"Worker node count must be > 0 "
                                 f"(received {num_workers}).")

        cluster_id = str(uuid.uuid4())
        # must _update_task or else self.task_resource is None
        # do not logout of sys admin, or else in pyvcloud's session.request()
        # call, session becomes None
        self._update_task(
            TaskStatus.RUNNING,
            message=f"Creating cluster vApp '{cluster_name}' ({cluster_id})"
                    f" from template '{template_name}' "
                    f"(revision {template_revision})")
        self._create_cluster_async(
            org_name=validated_data[RequestKey.ORG_NAME],
            ovdc_name=validated_data[RequestKey.OVDC_NAME],
            cluster_name=cluster_name,
            cluster_id=cluster_id,
            template_name=template_name,
            template_revision=template_revision,
            num_workers=validated_data[RequestKey.NUM_WORKERS],
            network_name=validated_data[RequestKey.NETWORK_NAME],
            num_cpu=validated_data[RequestKey.NUM_CPU],
            mb_memory=validated_data[RequestKey.MB_MEMORY],
            storage_profile_name=validated_data[RequestKey.STORAGE_PROFILE_NAME], # noqa: E501
            ssh_key_filepath=validated_data[RequestKey.SSH_KEY_FILEPATH],
            enable_nfs=validated_data[RequestKey.ENABLE_NFS],
            rollback=validated_data[RequestKey.ROLLBACK])

        return {
            'name': cluster_name,
            'cluster_id': cluster_id,
            'task_href': self.task_resource.get('href')
        }

    @secure(required_rights=[CSE_NATIVE_DEPLOY_RIGHT_NAME])
    def resize_cluster(self, data):
        """Start the resize cluster operation.

        Common broker function that validates data for the 'resize cluster'
        operation. Native clusters cannot be resized down. Creating nodes is an
        asynchronous task, so the returned `result['task_href']` can be polled
        to get updates on task progress.

        Required data: cluster_name, network, num_nodes
        Optional data and default values: org_name=None, ovdc_name=None,
            rollback=True, template_name=None, template_revision=None
        """
        # TODO default template for resizing should be master's template
        required = [
            RequestKey.CLUSTER_NAME,
            RequestKey.NUM_WORKERS,
            RequestKey.NETWORK_NAME
        ]
        utils.ensure_keys_in_dict(required, data, dict_name='data')
        defaults = {
            RequestKey.ORG_NAME: None,
            RequestKey.OVDC_NAME: None,
            RequestKey.ROLLBACK: True,
            RequestKey.TEMPLATE_NAME: None,
            RequestKey.TEMPLATE_REVISION: None
        }
        validated_data = {**defaults, **data}
        cluster_name = validated_data[RequestKey.CLUSTER_NAME]
        num_workers_wanted = validated_data[RequestKey.NUM_WORKERS]
        if num_workers_wanted < 1:
            raise CseServerError(f"Worker node count must be > 0 "
                                 f"(received {num_workers_wanted}).")

        # cluster_handler.py already makes a cluster info API call to vCD, but
        # that call does not return any node info, so this additional
        # cluster info call must be made
        cluster_info = self.get_cluster_info(validated_data)
        num_workers = len(cluster_info['nodes'])
        if num_workers > num_workers_wanted:
            raise CseServerError(f"Automatic scale down is not supported for "
                                 f"vCD powered Kubernetes clusters. Use "
                                 f"'vcd cse delete node' command.")
        elif num_workers == num_workers_wanted:
            raise CseServerError(f"Cluster '{cluster_name}' already has "
                                 f"{num_workers} worker nodes.")

        validated_data[RequestKey.NUM_WORKERS] = num_workers_wanted - num_workers # noqa: E501
        return self.create_nodes(validated_data)

    @secure(required_rights=[CSE_NATIVE_DEPLOY_RIGHT_NAME])
    def delete_cluster(self, data):
        """Start the delete cluster operation.

        Common broker function that validates data for 'delete cluster'
        operation. Deleting nodes is an asynchronous task, so the returned
        `result['task_href']` can be polled to get updates on task progress.

        Required data: cluster_name
        Optional data and default values: org_name=None, ovdc_name=None
        """
        required = [
            RequestKey.CLUSTER_NAME
        ]
        utils.ensure_keys_in_dict(required, data, dict_name='data')
        defaults = {
            RequestKey.ORG_NAME: None,
            RequestKey.OVDC_NAME: None
        }
        validated_data = {**defaults, **data}
        cluster_name = validated_data[RequestKey.CLUSTER_NAME]

        cluster = get_cluster(self.tenant_client, cluster_name,
                              org_name=validated_data[RequestKey.ORG_NAME],
                              ovdc_name=validated_data[RequestKey.OVDC_NAME])
        cluster_id = cluster['cluster_id']
        # must _update_task here or else self.task_resource is None
        # do not logout of sys admin, or else in pyvcloud's session.request()
        # call, session becomes None
        self._update_task(
            TaskStatus.RUNNING,
            message=f"Deleting cluster {cluster_name} ({cluster_id})")
        self._delete_cluster_async(cluster_name=cluster_name,
                                   cluster_vdc_href=cluster['vdc_href'])

        return {
            'cluster_name': cluster_name,
            'task_href': self.task_resource.get('href')
        }

    def get_node_info(self, data):
        """Get node metadata as dictionary.

        Required data: cluster_name, node_name
        Optional data and default values: org_name=None, ovdc_name=None
        """
        required = [
            RequestKey.CLUSTER_NAME,
            RequestKey.NODE_NAME
        ]
        utils.ensure_keys_in_dict(required, data, dict_name='data')
        defaults = {
            RequestKey.ORG_NAME: None,
            RequestKey.OVDC_NAME: None
        }
        validated_data = {**defaults, **data}
        cluster_name = validated_data[RequestKey.CLUSTER_NAME]
        node_name = validated_data[RequestKey.NODE_NAME]

        cluster = get_cluster(self.tenant_client, cluster_name,
                              org_name=validated_data[RequestKey.ORG_NAME],
                              ovdc_name=validated_data[RequestKey.OVDC_NAME])

        vapp = VApp(self.tenant_client, href=cluster['vapp_href'])
        vms = vapp.get_all_vms()
        node_info = None
        for vm in vms:
            vm_name = vm.get('name')
            if node_name != vm_name:
                continue

            node_info = {
                'name': vm_name,
                'numberOfCpus': '',
                'memoryMB': '',
                'status': VCLOUD_STATUS_MAP.get(int(vm.get('status'))),
                'ipAddress': ''
            }
            if hasattr(vm, 'VmSpecSection'):
                node_info['numberOfCpus'] = vm.VmSpecSection.NumCpus.text
                node_info['memoryMB'] = vm.VmSpecSection.MemoryResourceMb.Configured.text # noqa: E501
            try:
                node_info['ipAddress'] = vapp.get_primary_ip(vm_name)
            except Exception:
                LOGGER.debug(f"Unable to get ip address of node {vm_name}")
            if vm_name.startswith(NodeType.MASTER):
                node_info['node_type'] = 'master'
            elif vm_name.startswith(NodeType.WORKER):
                node_info['node_type'] = 'worker'
            elif vm_name.startswith(NodeType.NFS):
                node_info['node_type'] = 'nfs'
                node_info['exports'] = self._get_nfs_exports(node_info['ipAddress'], vapp, vm_name) # noqa: E501
        if node_info is None:
            raise NodeNotFoundError(f"Node '{node_name}' not found in "
                                    f"cluster '{cluster_name}'")
        return node_info

    @secure(required_rights=[CSE_NATIVE_DEPLOY_RIGHT_NAME])
    def create_nodes(self, data):
        """Start the create nodes operation.

        Validates data for 'node create' operation. Creating nodes is an
        asynchronous task, so the returned `result['task_href']` can be polled
        to get updates on task progress.

        Required data: cluster_name, network_name
        Optional data and default values: num_nodes=2, num_cpu=None,
            mb_memory=None, storage_profile_name=None, ssh_key_filepath=None,
            template_name=default, template_revision=default, enable_nfs=False,
            rollback=True
        """
        required = [
            RequestKey.CLUSTER_NAME,
            RequestKey.NETWORK_NAME
        ]
        utils.ensure_keys_in_dict(required, data, dict_name='data')
        cluster_name = data[RequestKey.CLUSTER_NAME]
        # check that requested/default template is valid
        template = get_template(
            name=data.get(RequestKey.TEMPLATE_NAME),
            revision=data.get(RequestKey.TEMPLATE_REVISION))
        defaults = {
            RequestKey.ORG_NAME: None,
            RequestKey.OVDC_NAME: None,
            RequestKey.NUM_WORKERS: 1,
            RequestKey.NUM_CPU: None,
            RequestKey.MB_MEMORY: None,
            RequestKey.STORAGE_PROFILE_NAME: None,
            RequestKey.SSH_KEY_FILEPATH: None,
            RequestKey.TEMPLATE_NAME: template[LocalTemplateKey.NAME],
            RequestKey.TEMPLATE_REVISION: template[LocalTemplateKey.REVISION],
            RequestKey.ENABLE_NFS: False,
            RequestKey.ROLLBACK: True,
        }
        validated_data = {**defaults, **data}

        # TODO HACK default dictionary combining needs to be fixed
        validated_data[RequestKey.TEMPLATE_NAME] = validated_data[RequestKey.TEMPLATE_NAME] or template[LocalTemplateKey.NAME] # noqa: E501
        validated_data[RequestKey.TEMPLATE_REVISION] = validated_data[RequestKey.TEMPLATE_REVISION] or template[LocalTemplateKey.REVISION] # noqa: E501

        template_name = validated_data[RequestKey.TEMPLATE_NAME]
        template_revision = validated_data[RequestKey.TEMPLATE_REVISION]

        num_workers = validated_data[RequestKey.NUM_WORKERS]
        if num_workers < 1:
            raise CseServerError(f"Worker node count must be > 0 "
                                 f"(received {num_workers}).")

        cluster = get_cluster(self.tenant_client, cluster_name,
                              org_name=validated_data[RequestKey.ORG_NAME],
                              ovdc_name=validated_data[RequestKey.OVDC_NAME])
        cluster_id = cluster['cluster_id']
        # must _update_task here or else self.task_resource is None
        # do not logout of sys admin, or else in pyvcloud's session.request()
        # call, session becomes None
        self._update_task(
            TaskStatus.RUNNING,
            message=f"Creating {num_workers} node(s) from template "
                    f"'{template_name}' (revision {template_revision}) and "
                    f"adding to {cluster_name} ({cluster_id})")
        self._create_nodes_async(
            cluster_name=cluster_name,
            cluster_vdc_href=cluster['vdc_href'],
            cluster_vapp_href=cluster['vapp_href'],
            cluster_id=cluster_id,
            template_name=template_name,
            template_revision=template_revision,
            num_workers=validated_data[RequestKey.NUM_WORKERS],
            network_name=validated_data[RequestKey.NETWORK_NAME],
            num_cpu=validated_data[RequestKey.NUM_CPU],
            mb_memory=validated_data[RequestKey.MB_MEMORY],
            storage_profile_name=validated_data[RequestKey.STORAGE_PROFILE_NAME], # noqa: E501
            ssh_key_filepath=validated_data[RequestKey.SSH_KEY_FILEPATH],
            enable_nfs=validated_data[RequestKey.ENABLE_NFS],
            rollback=validated_data[RequestKey.ROLLBACK])

        return {
            'cluster_name': cluster_name,
            'task_href': self.task_resource.get('href')
        }

    @secure(required_rights=[CSE_NATIVE_DEPLOY_RIGHT_NAME])
    def delete_nodes(self, data):
        """Start the delete nodes operation.

        Validates data for the 'delete nodes' operation. Deleting nodes is an
        asynchronous task, so the returned `result['task_href']` can be polled
        to get updates on task progress.

        Required data: cluster_name, node_names_list
        Optional data and default values: org_name=None, ovdc_name=None
        """
        required = [
            RequestKey.CLUSTER_NAME,
            RequestKey.NODE_NAMES_LIST
        ]
        utils.ensure_keys_in_dict(required, data, dict_name='data')
        defaults = {
            RequestKey.ORG_NAME: None,
            RequestKey.OVDC_NAME: None
        }
        validated_data = {**defaults, **data}
        cluster_name = validated_data[RequestKey.CLUSTER_NAME]
        node_names_list = validated_data[RequestKey.NODE_NAMES_LIST]
        # check that there are nodes to delete
        if len(node_names_list) == 0:
            LOGGER.debug("No nodes specified to delete")
            return {'body': {}}
        # check that master node is not in specified nodes
        for node in node_names_list:
            if node.startswith(NodeType.MASTER):
                raise CseServerError(f"Can't delete a master node: '{node}'.")

        cluster = get_cluster(self.tenant_client, cluster_name,
                              org_name=validated_data[RequestKey.ORG_NAME],
                              ovdc_name=validated_data[RequestKey.OVDC_NAME])
        cluster_id = cluster['cluster_id']
        # must _update_task here or else self.task_resource is None
        # do not logout of sys admin, or else in pyvcloud's session.request()
        # call, session becomes None
        self._update_task(
            TaskStatus.RUNNING,
            message=f"Deleting {len(node_names_list)} node(s)"
                    f" from cluster {cluster_name}({cluster_id})")
        self._delete_nodes_async(
            cluster_name=cluster_name,
            cluster_vapp_href=cluster['vapp_href'],
            node_names_list=validated_data[RequestKey.NODE_NAMES_LIST])

        return {
            'cluster_name': cluster_name,
            'task_href': self.task_resource.get('href')
        }

    # all parameters following '*args' are required and keyword-only
    @run_async
    def _create_cluster_async(self, *args,
                              org_name, ovdc_name, cluster_name, cluster_id,
                              template_name, template_revision, num_workers,
                              network_name, num_cpu, mb_memory,
                              storage_profile_name, ssh_key_filepath,
                              enable_nfs, rollback):
        org = vcd_utils.get_org(self.tenant_client, org_name=org_name)
        vdc = vcd_utils.get_vdc(
            self.tenant_client, vdc_name=ovdc_name, org=org)

        LOGGER.debug(f"About to create cluster {cluster_name} on {ovdc_name}"
                     f" with {num_workers} worker nodes, "
                     f"storage profile={storage_profile_name}")
        try:
            self._update_task(
                TaskStatus.RUNNING,
                message=f"Creating cluster vApp {cluster_name}({cluster_id})")
            try:
                vapp_resource = \
                    vdc.create_vapp(cluster_name,
                                    description=f"cluster {cluster_name}",
                                    network=network_name,
                                    fence_mode='bridged')
            except Exception as e:
                msg = f"Error while creating vApp: {e}"
                LOGGER.debug(str(e))
                raise ClusterOperationError(msg)
            self.tenant_client.get_task_monitor().wait_for_status(vapp_resource.Tasks.Task[0]) # noqa: E501

            template = get_template(template_name, template_revision)

            tags = {
                ClusterMetadataKey.CLUSTER_ID: cluster_id,
                ClusterMetadataKey.CSE_VERSION: pkg_resources.require('container-service-extension')[0].version, # noqa: E501
                ClusterMetadataKey.TEMPLATE_NAME: template[LocalTemplateKey.NAME], # noqa: E501
                ClusterMetadataKey.TEMPLATE_REVISION: template[LocalTemplateKey.REVISION] # noqa: E501
            }
            vapp = VApp(self.tenant_client, href=vapp_resource.get('href'))
            task = vapp.set_multiple_metadata(tags)
            self.tenant_client.get_task_monitor().wait_for_status(task)

            self._update_task(
                TaskStatus.RUNNING,
                message=f"Creating master node for "
                        f"{cluster_name} ({cluster_id})")
            vapp.reload()
            server_config = utils.get_server_runtime_config()
            catalog_name = server_config['broker']['catalog']
            try:
                add_nodes(client=self.tenant_client,
                          num_nodes=1,
                          node_type=NodeType.MASTER,
                          org=org,
                          vdc=vdc,
                          vapp=vapp,
                          catalog_name=catalog_name,
                          template=template,
                          network_name=network_name,
                          num_cpu=num_cpu,
                          memory_in_mb=mb_memory,
                          storage_profile=storage_profile_name,
                          ssh_key_filepath=ssh_key_filepath)
            except Exception as e:
                raise MasterNodeCreationError("Error adding master node:",
                                              str(e))

            self._update_task(
                TaskStatus.RUNNING,
                message=f"Initializing cluster {cluster_name} ({cluster_id})")
            vapp.reload()
            init_cluster(vapp, template[LocalTemplateKey.NAME],
                         template[LocalTemplateKey.REVISION])
            master_ip = get_master_ip(vapp)
            task = vapp.set_metadata('GENERAL', 'READWRITE', 'cse.master.ip',
                                     master_ip)
            self.tenant_client.get_task_monitor().wait_for_status(task)

            self._update_task(
                TaskStatus.RUNNING,
                message=f"Creating {num_workers} node(s) for "
                        f"{cluster_name}({cluster_id})")
            try:
                add_nodes(client=self.tenant_client,
                          num_nodes=num_workers,
                          node_type=NodeType.WORKER,
                          org=org,
                          vdc=vdc,
                          vapp=vapp,
                          catalog_name=catalog_name,
                          template=template,
                          network_name=network_name,
                          num_cpu=num_cpu,
                          memory_in_mb=mb_memory,
                          storage_profile=storage_profile_name,
                          ssh_key_filepath=ssh_key_filepath)
            except Exception as e:
                raise WorkerNodeCreationError("Error creating worker node:",
                                              str(e))

            self._update_task(
                TaskStatus.RUNNING,
                message=f"Adding {num_workers} node(s) to "
                        f"{cluster_name}({cluster_id})")
            vapp.reload()
            join_cluster(vapp, template[LocalTemplateKey.NAME],
                         template[LocalTemplateKey.REVISION])

            if enable_nfs:
                self._update_task(
                    TaskStatus.RUNNING,
                    message=f"Creating NFS node for "
                            f"{cluster_name} ({cluster_id})")
                try:
                    add_nodes(client=self.tenant_client,
                              num_nodes=1,
                              node_type=NodeType.NFS,
                              org=org,
                              vdc=vdc,
                              vapp=vapp,
                              catalog_name=catalog_name,
                              template=template,
                              network_name=network_name,
                              num_cpu=num_cpu,
                              memory_in_mb=mb_memory,
                              storage_profile=storage_profile_name,
                              ssh_key_filepath=ssh_key_filepath)
                except Exception as e:
                    raise NFSNodeCreationError("Error creating NFS node:",
                                               str(e))

            self._update_task(
                TaskStatus.SUCCESS,
                message=f"Created cluster {cluster_name} ({cluster_id})")
        except (MasterNodeCreationError, WorkerNodeCreationError,
                NFSNodeCreationError, ClusterJoiningError,
                ClusterInitializationError, ClusterOperationError) as e:
            if rollback:
                msg = f"Error creating cluster {cluster_name}. " \
                      f"Deleting cluster (rollback=True)"
                self._update_task(TaskStatus.RUNNING, message=msg)
                LOGGER.info(msg)
                try:
                    cluster = get_cluster(self.tenant_client,
                                          cluster_name,
                                          cluster_id=cluster_id,
                                          org_name=org_name,
                                          ovdc_name=ovdc_name)
                    self._delete_cluster(cluster_name=cluster_name,
                                         cluster_vdc_href=cluster['vdc_href'])
                except Exception:
                    LOGGER.error(f"Failed to delete cluster {cluster_name}",
                                 exc_info=True)
            LOGGER.error(f"Error creating cluster {cluster_name}",
                         exc_info=True)
            error_obj = error_to_json(e)
            stack_trace = ''.join(error_obj[ERROR_MESSAGE_KEY][ERROR_STACKTRACE_KEY]) # noqa: E501
            self._update_task(
                TaskStatus.ERROR,
                error_message=error_obj[ERROR_MESSAGE_KEY][ERROR_DESCRIPTION_KEY], # noqa: E501
                stack_trace=stack_trace)
            # raising an exception here prints a stacktrace to server console
        except Exception as e:
            LOGGER.error(f"Unknown error creating cluster {cluster_name}",
                         exc_info=True)
            error_obj = error_to_json(e)
            stack_trace = ''.join(error_obj[ERROR_MESSAGE_KEY][ERROR_STACKTRACE_KEY]) # noqa: E501
            self._update_task(
                TaskStatus.ERROR,
                error_message=error_obj[ERROR_MESSAGE_KEY][ERROR_DESCRIPTION_KEY], # noqa: E501
                stack_trace=stack_trace)
        finally:
            self.logout_sys_admin_client()

    @run_async
    def _create_nodes_async(self, *args,
                            cluster_name, cluster_vdc_href, cluster_vapp_href,
                            cluster_id, template_name, template_revision,
                            num_workers, network_name, num_cpu, mb_memory,
                            storage_profile_name, ssh_key_filepath, enable_nfs,
                            rollback):
        org = vcd_utils.get_org(self.tenant_client)
        vdc = VDC(self.tenant_client, href=cluster_vdc_href)
        vapp = VApp(self.tenant_client, href=cluster_vapp_href)
        template = get_template(name=template_name, revision=template_revision)
        msg = f"Creating {num_workers} node(s) from template " \
              f"'{template_name}' (revision {template_revision}) and " \
              f"adding to {cluster_name} ({cluster_id})"
        LOGGER.debug(msg)
        try:
            self._update_task(TaskStatus.RUNNING, message=msg)

            node_type = NodeType.WORKER
            if enable_nfs:
                node_type = NodeType.NFS

            server_config = utils.get_server_runtime_config()
            catalog_name = server_config['broker']['catalog']

            new_nodes = add_nodes(client=self.tenant_client,
                                  num_nodes=num_workers,
                                  node_type=node_type,
                                  org=org,
                                  vdc=vdc,
                                  vapp=vapp,
                                  catalog_name=catalog_name,
                                  template=template,
                                  network_name=network_name,
                                  num_cpu=num_cpu,
                                  memory_in_mb=mb_memory,
                                  storage_profile=storage_profile_name,
                                  ssh_key_filepath=ssh_key_filepath)

            if node_type == NodeType.NFS:
                self._update_task(
                    TaskStatus.SUCCESS,
                    message=f"Created {num_workers} node(s) for "
                            f"{cluster_name}({cluster_id})")
            elif node_type == NodeType.WORKER:
                self._update_task(
                    TaskStatus.RUNNING,
                    message=f"Adding {num_workers} node(s) to cluster "
                            f"{cluster_name}({cluster_id})")
                target_nodes = []
                for spec in new_nodes['specs']:
                    target_nodes.append(spec['target_vm_name'])
                vapp.reload()
                join_cluster(vapp, template[LocalTemplateKey.NAME],
                             template[LocalTemplateKey.REVISION], target_nodes)
                self._update_task(
                    TaskStatus.SUCCESS,
                    message=f"Added {num_workers} node(s) to cluster "
                            f"{cluster_name}({cluster_id})")
        except NodeCreationError as e:
            if rollback:
                msg = f"Error adding nodes to {cluster_name} {cluster_id}." \
                      f" Deleting nodes: {e.node_names} (rollback=True)"
                self._update_task(TaskStatus.RUNNING, message=msg)
                LOGGER.info(msg)
                try:
                    self._delete_nodes(cluster_name=cluster_name,
                                       cluster_vapp_href=cluster_vapp_href,
                                       node_names_list=e.node_names)
                except Exception:
                    LOGGER.error(f"Failed to delete nodes {e.node_names} "
                                 f"from cluster {cluster_name}",
                                 exc_info=True)
            LOGGER.error(f"Error adding nodes to {cluster_name}",
                         exc_info=True)
            error_obj = error_to_json(e)
            LOGGER.error(str(e), exc_info=True)
            stack_trace = ''.join(error_obj[ERROR_MESSAGE_KEY][ERROR_STACKTRACE_KEY]) # noqa: E501
            self._update_task(
                TaskStatus.ERROR,
                error_message=error_obj[ERROR_MESSAGE_KEY][ERROR_DESCRIPTION_KEY], # noqa: E501
                stack_trace=stack_trace)
            # raising an exception here prints a stacktrace to server console
        except Exception as e:
            error_obj = error_to_json(e)
            LOGGER.error(str(e), exc_info=True)
            stack_trace = ''.join(error_obj[ERROR_MESSAGE_KEY][ERROR_STACKTRACE_KEY]) # noqa: E501
            self._update_task(
                TaskStatus.ERROR,
                error_message=error_obj[ERROR_MESSAGE_KEY][ERROR_DESCRIPTION_KEY], # noqa: E501
                stack_trace=stack_trace)
        finally:
            self.logout_sys_admin_client()

    # all parameters following '*args' are required and keyword-only
    @run_async
    def _delete_nodes_async(self, *args,
                            cluster_name, cluster_vapp_href, node_names_list):
        try:
            self._update_task(
                TaskStatus.RUNNING,
                message=f"Deleting {len(node_names_list)} node(s) "
                        f"from cluster {cluster_name}")
            self._delete_nodes(cluster_name=cluster_name,
                               cluster_vapp_href=cluster_vapp_href,
                               node_names_list=node_names_list)
            self._update_task(
                TaskStatus.SUCCESS,
                message=f"Deleted {len(node_names_list)} node(s)"
                        f" to cluster {cluster_name}")
        except Exception as e:
            LOGGER.error(f"Unexpected error while deleting nodes "
                         f"{node_names_list}: {e}",
                         exc_info=True)
            error_obj = error_to_json(e)
            stack_trace = ''.join(error_obj[ERROR_MESSAGE_KEY][ERROR_STACKTRACE_KEY]) # noqa: E501
            self._update_task(
                TaskStatus.ERROR,
                error_message=error_obj[ERROR_MESSAGE_KEY][ERROR_DESCRIPTION_KEY], # noqa: E501
                stack_trace=stack_trace)
        finally:
            self.logout_sys_admin_client()

    # all parameters following '*args' are required and keyword-only
    @run_async
    def _delete_cluster_async(self, *args, cluster_name, cluster_vdc_href):
        try:
            self._update_task(
                TaskStatus.RUNNING,
                message=f"Deleting cluster {cluster_name}")
            self._delete_cluster(cluster_name=cluster_name,
                                 cluster_vdc_href=cluster_vdc_href)
            self._update_task(
                TaskStatus.SUCCESS,
                message=f"Deleted cluster {cluster_name}")
        except Exception as e:
            LOGGER.error(f"Unexpected error while deleting cluster: {e}",
                         exc_info=True)
            self._update_task(TaskStatus.ERROR, error_message=str(e))
        finally:
            self.logout_sys_admin_client()

    # all parameters following '*args' are required and keyword-only
    # synchronous cluster/node delete functions are required for rollback
    def _delete_cluster(self, *args, cluster_name, cluster_vdc_href):
        LOGGER.debug(f"About to delete cluster with name: {cluster_name}")
        vdc = VDC(self.tenant_client, href=cluster_vdc_href)
        task = vdc.delete_vapp(cluster_name, force=True)
        self.tenant_client.get_task_monitor().wait_for_status(task)

    # all parameters following '*args' are required and keyword-only
    def _delete_nodes(self, *args,
                      cluster_name, cluster_vapp_href, node_names_list):
        LOGGER.debug(f"About to delete nodes {node_names_list} "
                     f"from cluster {cluster_name}")
        vapp = VApp(self.tenant_client, href=cluster_vapp_href)
        try:
            delete_nodes_from_cluster(vapp, node_names_list)
        except Exception:
            LOGGER.error(f"Couldn't delete node {node_names_list} "
                         f"from cluster:{cluster_name}")
        for vm_name in node_names_list:
            vm = VM(self.tenant_client, resource=vapp.get_vm(vm_name))
            try:
                task = vm.undeploy()
                self.tenant_client.get_task_monitor().wait_for_status(task)
            except Exception:
                LOGGER.warning(f"Couldn't undeploy VM {vm_name}")
        task = vapp.delete_vms(node_names_list)
        self.tenant_client.get_task_monitor().wait_for_status(task)

    def _update_task(self, status, message='', error_message=None,
                     stack_trace=''):
        """Update task or create it if it does not exist.

        This function should only be used in the x_async functions, or in the
        6 common broker functions to create the required task.
        When this function is used, it logs in the sys admin client if it is
        not already logged in, but it does not log out. This is because many
        _update_task() calls are used in sequence until the task succeeds or
        fails. Once the task is updated to a success or failure state, then
        the sys admin client should be logged out.

        Another reason for decoupling sys admin logout and this function is
        because if any unknown errors occur during an operation, there should
        be a finally clause that takes care of logging out.
        """
        if not self.tenant_client.is_sysadmin():
            stack_trace = ''

        if self.task is None:
            self.task = Task(self.sys_admin_client)

        task_href = None
        if self.task_resource is not None:
            task_href = self.task_resource.get('href')

        org = vcd_utils.get_org(self.tenant_client)
        user_href = org.get_user(self.client_session.get('user')).get('href')

        self.task_resource = self.task.update(
            status=status.value,
            namespace='vcloud.cse',
            operation=message,
            operation_name='cluster operation',
            details='',
            progress=None,
            owner_href=self.tenant_org_href,
            owner_name=self.tenant_org_name,
            owner_type='application/vnd.vmware.vcloud.org+xml',
            user_href=user_href,
            user_name=self.tenant_user_name,
            org_href=self.tenant_org_href,
            task_href=task_href,
            error_message=error_message,
            stack_trace=stack_trace
        )

    def _get_nfs_exports(self, ip, vapp, vm_name):
        """Get the exports from remote NFS server (helper method).

        :param ip: (str): IP address of the NFS server
        :param vapp: (pyvcloud.vcd.vapp.VApp): The vApp or cluster
         to which node belongs
        :param vm_name: name of node's VM

        :return: (List): List of exports
        """
        script = f"#!/usr/bin/env bash\nshowmount -e {ip}"
        result = execute_script_in_nodes(vapp=vapp, node_names=[vm_name],
                                         script=script, check_tools=False)
        lines = result[0][1].content.decode().split('\n')
        exports = []
        for index in range(1, len(lines) - 1):
            export = lines[index].strip().split()[0]
            exports.append(export)
        return exports