def delete_cluster(self, data): """Start the delete cluster operation. Common broker function that validates data for 'delete cluster' operation. Deleting nodes is an asynchronous task, so the returned `result['task_href']` can be polled to get updates on task progress. Required data: cluster_name Optional data and default values: org_name=None, ovdc_name=None """ required = [RequestKey.CLUSTER_NAME] req_utils.validate_payload(data, required) defaults = {RequestKey.ORG_NAME: None, RequestKey.OVDC_NAME: None} validated_data = {**defaults, **data} cluster_name = validated_data[RequestKey.CLUSTER_NAME] cluster = get_cluster(self.tenant_client, cluster_name, org_name=validated_data[RequestKey.ORG_NAME], ovdc_name=validated_data[RequestKey.OVDC_NAME]) cluster_id = cluster['cluster_id'] # must _update_task here or else self.task_resource is None # do not logout of sys admin, or else in pyvcloud's session.request() # call, session becomes None self._update_task( TaskStatus.RUNNING, message=f"Deleting cluster {cluster_name} ({cluster_id})") self._delete_cluster_async(cluster_name=cluster_name, cluster_vdc_href=cluster['vdc_href']) return { 'cluster_name': cluster_name, 'task_href': self.task_resource.get('href') }
def get_node_info(self, data): """Get node metadata as dictionary. Required data: cluster_name, node_name Optional data and default values: org_name=None, ovdc_name=None """ required = [ RequestKey.CLUSTER_NAME, RequestKey.NODE_NAME ] utils.ensure_keys_in_dict(required, data, dict_name='data') defaults = { RequestKey.ORG_NAME: None, RequestKey.OVDC_NAME: None } validated_data = {**defaults, **data} cluster_name = validated_data[RequestKey.CLUSTER_NAME] node_name = validated_data[RequestKey.NODE_NAME] cluster = get_cluster(self.tenant_client, cluster_name, org_name=validated_data[RequestKey.ORG_NAME], ovdc_name=validated_data[RequestKey.OVDC_NAME]) vapp = VApp(self.tenant_client, href=cluster['vapp_href']) vms = vapp.get_all_vms() node_info = None for vm in vms: vm_name = vm.get('name') if node_name != vm_name: continue node_info = { 'name': vm_name, 'numberOfCpus': '', 'memoryMB': '', 'status': VCLOUD_STATUS_MAP.get(int(vm.get('status'))), 'ipAddress': '' } if hasattr(vm, 'VmSpecSection'): node_info['numberOfCpus'] = vm.VmSpecSection.NumCpus.text node_info['memoryMB'] = vm.VmSpecSection.MemoryResourceMb.Configured.text # noqa: E501 try: node_info['ipAddress'] = vapp.get_primary_ip(vm_name) except Exception: LOGGER.debug(f"Unable to get ip address of node {vm_name}") if vm_name.startswith(NodeType.MASTER): node_info['node_type'] = 'master' elif vm_name.startswith(NodeType.WORKER): node_info['node_type'] = 'worker' elif vm_name.startswith(NodeType.NFS): node_info['node_type'] = 'nfs' node_info['exports'] = self._get_nfs_exports(node_info['ipAddress'], vapp, vm_name) # noqa: E501 if node_info is None: raise NodeNotFoundError(f"Node '{node_name}' not found in " f"cluster '{cluster_name}'") return node_info
def delete_nodes(self, data): """Start the delete nodes operation. Validates data for the 'delete nodes' operation. Deleting nodes is an asynchronous task, so the returned `result['task_href']` can be polled to get updates on task progress. Required data: cluster_name, node_names_list Optional data and default values: org_name=None, ovdc_name=None """ required = [ RequestKey.CLUSTER_NAME, RequestKey.NODE_NAMES_LIST ] utils.ensure_keys_in_dict(required, data, dict_name='data') defaults = { RequestKey.ORG_NAME: None, RequestKey.OVDC_NAME: None } validated_data = {**defaults, **data} cluster_name = validated_data[RequestKey.CLUSTER_NAME] node_names_list = validated_data[RequestKey.NODE_NAMES_LIST] # check that there are nodes to delete if len(node_names_list) == 0: LOGGER.debug("No nodes specified to delete") return {'body': {}} # check that master node is not in specified nodes for node in node_names_list: if node.startswith(NodeType.MASTER): raise CseServerError(f"Can't delete a master node: '{node}'.") cluster = get_cluster(self.tenant_client, cluster_name, org_name=validated_data[RequestKey.ORG_NAME], ovdc_name=validated_data[RequestKey.OVDC_NAME]) cluster_id = cluster['cluster_id'] # must _update_task here or else self.task_resource is None # do not logout of sys admin, or else in pyvcloud's session.request() # call, session becomes None self._update_task( TaskStatus.RUNNING, message=f"Deleting {len(node_names_list)} node(s)" f" from cluster {cluster_name}({cluster_id})") self._delete_nodes_async( cluster_name=cluster_name, cluster_vapp_href=cluster['vapp_href'], node_names_list=validated_data[RequestKey.NODE_NAMES_LIST]) return { 'cluster_name': cluster_name, 'task_href': self.task_resource.get('href') }
def get_cluster_config(self, data): """Get the cluster's kube config contents. Common broker function that validates data for 'cluster config' operation and returns the cluster's kube config file contents as a string. Required data: cluster_name Optional data and default values: org_name=None, ovdc_name=None """ required = [ RequestKey.CLUSTER_NAME ] utils.ensure_keys_in_dict(required, data, dict_name='data') defaults = { RequestKey.ORG_NAME: None, RequestKey.OVDC_NAME: None } validated_data = {**defaults, **data} cluster_name = validated_data[RequestKey.CLUSTER_NAME] cluster = get_cluster(self.tenant_client, cluster_name, org_name=validated_data[RequestKey.ORG_NAME], ovdc_name=validated_data[RequestKey.OVDC_NAME]) vapp = VApp(self.tenant_client, href=cluster['vapp_href']) node_names = get_node_names(vapp, NodeType.MASTER) all_results = [] try: for node_name in node_names: LOGGER.debug(f"getting file from node {node_name}") password = vapp.get_admin_password(node_name) vs = vs_utils.get_vsphere(self.sys_admin_client, vapp, vm_name=node_name, logger=LOGGER) vs.connect() moid = vapp.get_vm_moid(node_name) vm = vs.get_vm_by_moid(moid) filename = '/root/.kube/config' result = vs.download_file_from_guest(vm, 'root', password, filename) all_results.append(result) finally: self.logout_sys_admin_client() if len(all_results) == 0 or all_results[0].status_code != requests.codes.ok: # noqa: E501 raise ClusterOperationError("Couldn't get cluster configuration") return all_results[0].content.decode()
def get_cluster_info(self, data): """Get cluster metadata as well as node data. Common broker function that validates data for the 'cluster info' operation and returns cluster/node metadata as dictionary. Required data: cluster_name Optional data and default values: org_name=None, ovdc_name=None """ required = [ RequestKey.CLUSTER_NAME ] utils.ensure_keys_in_dict(required, data, dict_name='data') defaults = { RequestKey.ORG_NAME: None, RequestKey.OVDC_NAME: None } validated_data = {**defaults, **data} cluster_name = validated_data[RequestKey.CLUSTER_NAME] cluster = get_cluster(self.tenant_client, cluster_name, org_name=validated_data[RequestKey.ORG_NAME], ovdc_name=validated_data[RequestKey.OVDC_NAME]) cluster[K8S_PROVIDER_KEY] = K8sProvider.NATIVE vapp = VApp(self.tenant_client, href=cluster['vapp_href']) vms = vapp.get_all_vms() for vm in vms: node_info = { 'name': vm.get('name'), 'ipAddress': '' } try: node_info['ipAddress'] = vapp.get_primary_ip(vm.get('name')) except Exception: LOGGER.debug(f"Unable to get ip address of node " f"{vm.get('name')}") if vm.get('name').startswith(NodeType.MASTER): cluster.get('master_nodes').append(node_info) elif vm.get('name').startswith(NodeType.WORKER): cluster.get('nodes').append(node_info) elif vm.get('name').startswith(NodeType.NFS): cluster.get('nfs_nodes').append(node_info) return cluster
def _create_cluster_async(self, *args, org_name, ovdc_name, cluster_name, cluster_id, template_name, template_revision, num_workers, network_name, num_cpu, mb_memory, storage_profile_name, ssh_key_filepath, enable_nfs, rollback): org = vcd_utils.get_org(self.tenant_client, org_name=org_name) vdc = vcd_utils.get_vdc( self.tenant_client, vdc_name=ovdc_name, org=org) LOGGER.debug(f"About to create cluster {cluster_name} on {ovdc_name}" f" with {num_workers} worker nodes, " f"storage profile={storage_profile_name}") try: self._update_task( TaskStatus.RUNNING, message=f"Creating cluster vApp {cluster_name}({cluster_id})") try: vapp_resource = \ vdc.create_vapp(cluster_name, description=f"cluster {cluster_name}", network=network_name, fence_mode='bridged') except Exception as e: msg = f"Error while creating vApp: {e}" LOGGER.debug(str(e)) raise ClusterOperationError(msg) self.tenant_client.get_task_monitor().wait_for_status(vapp_resource.Tasks.Task[0]) # noqa: E501 template = get_template(template_name, template_revision) tags = { ClusterMetadataKey.CLUSTER_ID: cluster_id, ClusterMetadataKey.CSE_VERSION: pkg_resources.require('container-service-extension')[0].version, # noqa: E501 ClusterMetadataKey.TEMPLATE_NAME: template[LocalTemplateKey.NAME], # noqa: E501 ClusterMetadataKey.TEMPLATE_REVISION: template[LocalTemplateKey.REVISION] # noqa: E501 } vapp = VApp(self.tenant_client, href=vapp_resource.get('href')) task = vapp.set_multiple_metadata(tags) self.tenant_client.get_task_monitor().wait_for_status(task) self._update_task( TaskStatus.RUNNING, message=f"Creating master node for " f"{cluster_name} ({cluster_id})") vapp.reload() server_config = utils.get_server_runtime_config() catalog_name = server_config['broker']['catalog'] try: add_nodes(client=self.tenant_client, num_nodes=1, node_type=NodeType.MASTER, org=org, vdc=vdc, vapp=vapp, catalog_name=catalog_name, template=template, network_name=network_name, num_cpu=num_cpu, memory_in_mb=mb_memory, storage_profile=storage_profile_name, ssh_key_filepath=ssh_key_filepath) except Exception as e: raise MasterNodeCreationError("Error adding master node:", str(e)) self._update_task( TaskStatus.RUNNING, message=f"Initializing cluster {cluster_name} ({cluster_id})") vapp.reload() init_cluster(vapp, template[LocalTemplateKey.NAME], template[LocalTemplateKey.REVISION]) master_ip = get_master_ip(vapp) task = vapp.set_metadata('GENERAL', 'READWRITE', 'cse.master.ip', master_ip) self.tenant_client.get_task_monitor().wait_for_status(task) self._update_task( TaskStatus.RUNNING, message=f"Creating {num_workers} node(s) for " f"{cluster_name}({cluster_id})") try: add_nodes(client=self.tenant_client, num_nodes=num_workers, node_type=NodeType.WORKER, org=org, vdc=vdc, vapp=vapp, catalog_name=catalog_name, template=template, network_name=network_name, num_cpu=num_cpu, memory_in_mb=mb_memory, storage_profile=storage_profile_name, ssh_key_filepath=ssh_key_filepath) except Exception as e: raise WorkerNodeCreationError("Error creating worker node:", str(e)) self._update_task( TaskStatus.RUNNING, message=f"Adding {num_workers} node(s) to " f"{cluster_name}({cluster_id})") vapp.reload() join_cluster(vapp, template[LocalTemplateKey.NAME], template[LocalTemplateKey.REVISION]) if enable_nfs: self._update_task( TaskStatus.RUNNING, message=f"Creating NFS node for " f"{cluster_name} ({cluster_id})") try: add_nodes(client=self.tenant_client, num_nodes=1, node_type=NodeType.NFS, org=org, vdc=vdc, vapp=vapp, catalog_name=catalog_name, template=template, network_name=network_name, num_cpu=num_cpu, memory_in_mb=mb_memory, storage_profile=storage_profile_name, ssh_key_filepath=ssh_key_filepath) except Exception as e: raise NFSNodeCreationError("Error creating NFS node:", str(e)) self._update_task( TaskStatus.SUCCESS, message=f"Created cluster {cluster_name} ({cluster_id})") except (MasterNodeCreationError, WorkerNodeCreationError, NFSNodeCreationError, ClusterJoiningError, ClusterInitializationError, ClusterOperationError) as e: if rollback: msg = f"Error creating cluster {cluster_name}. " \ f"Deleting cluster (rollback=True)" self._update_task(TaskStatus.RUNNING, message=msg) LOGGER.info(msg) try: cluster = get_cluster(self.tenant_client, cluster_name, cluster_id=cluster_id, org_name=org_name, ovdc_name=ovdc_name) self._delete_cluster(cluster_name=cluster_name, cluster_vdc_href=cluster['vdc_href']) except Exception: LOGGER.error(f"Failed to delete cluster {cluster_name}", exc_info=True) LOGGER.error(f"Error creating cluster {cluster_name}", exc_info=True) error_obj = error_to_json(e) stack_trace = ''.join(error_obj[ERROR_MESSAGE_KEY][ERROR_STACKTRACE_KEY]) # noqa: E501 self._update_task( TaskStatus.ERROR, error_message=error_obj[ERROR_MESSAGE_KEY][ERROR_DESCRIPTION_KEY], # noqa: E501 stack_trace=stack_trace) # raising an exception here prints a stacktrace to server console except Exception as e: LOGGER.error(f"Unknown error creating cluster {cluster_name}", exc_info=True) error_obj = error_to_json(e) stack_trace = ''.join(error_obj[ERROR_MESSAGE_KEY][ERROR_STACKTRACE_KEY]) # noqa: E501 self._update_task( TaskStatus.ERROR, error_message=error_obj[ERROR_MESSAGE_KEY][ERROR_DESCRIPTION_KEY], # noqa: E501 stack_trace=stack_trace) finally: self.logout_sys_admin_client()
def create_nodes(self, data): """Start the create nodes operation. Validates data for 'node create' operation. Creating nodes is an asynchronous task, so the returned `result['task_href']` can be polled to get updates on task progress. Required data: cluster_name, network_name Optional data and default values: num_nodes=2, num_cpu=None, mb_memory=None, storage_profile_name=None, ssh_key_filepath=None, template_name=default, template_revision=default, enable_nfs=False, rollback=True """ required = [ RequestKey.CLUSTER_NAME, RequestKey.NETWORK_NAME ] utils.ensure_keys_in_dict(required, data, dict_name='data') cluster_name = data[RequestKey.CLUSTER_NAME] # check that requested/default template is valid template = get_template( name=data.get(RequestKey.TEMPLATE_NAME), revision=data.get(RequestKey.TEMPLATE_REVISION)) defaults = { RequestKey.ORG_NAME: None, RequestKey.OVDC_NAME: None, RequestKey.NUM_WORKERS: 1, RequestKey.NUM_CPU: None, RequestKey.MB_MEMORY: None, RequestKey.STORAGE_PROFILE_NAME: None, RequestKey.SSH_KEY_FILEPATH: None, RequestKey.TEMPLATE_NAME: template[LocalTemplateKey.NAME], RequestKey.TEMPLATE_REVISION: template[LocalTemplateKey.REVISION], RequestKey.ENABLE_NFS: False, RequestKey.ROLLBACK: True, } validated_data = {**defaults, **data} # TODO HACK default dictionary combining needs to be fixed validated_data[RequestKey.TEMPLATE_NAME] = validated_data[RequestKey.TEMPLATE_NAME] or template[LocalTemplateKey.NAME] # noqa: E501 validated_data[RequestKey.TEMPLATE_REVISION] = validated_data[RequestKey.TEMPLATE_REVISION] or template[LocalTemplateKey.REVISION] # noqa: E501 template_name = validated_data[RequestKey.TEMPLATE_NAME] template_revision = validated_data[RequestKey.TEMPLATE_REVISION] num_workers = validated_data[RequestKey.NUM_WORKERS] if num_workers < 1: raise CseServerError(f"Worker node count must be > 0 " f"(received {num_workers}).") cluster = get_cluster(self.tenant_client, cluster_name, org_name=validated_data[RequestKey.ORG_NAME], ovdc_name=validated_data[RequestKey.OVDC_NAME]) cluster_id = cluster['cluster_id'] # must _update_task here or else self.task_resource is None # do not logout of sys admin, or else in pyvcloud's session.request() # call, session becomes None self._update_task( TaskStatus.RUNNING, message=f"Creating {num_workers} node(s) from template " f"'{template_name}' (revision {template_revision}) and " f"adding to {cluster_name} ({cluster_id})") self._create_nodes_async( cluster_name=cluster_name, cluster_vdc_href=cluster['vdc_href'], cluster_vapp_href=cluster['vapp_href'], cluster_id=cluster_id, template_name=template_name, template_revision=template_revision, num_workers=validated_data[RequestKey.NUM_WORKERS], network_name=validated_data[RequestKey.NETWORK_NAME], num_cpu=validated_data[RequestKey.NUM_CPU], mb_memory=validated_data[RequestKey.MB_MEMORY], storage_profile_name=validated_data[RequestKey.STORAGE_PROFILE_NAME], # noqa: E501 ssh_key_filepath=validated_data[RequestKey.SSH_KEY_FILEPATH], enable_nfs=validated_data[RequestKey.ENABLE_NFS], rollback=validated_data[RequestKey.ROLLBACK]) return { 'cluster_name': cluster_name, 'task_href': self.task_resource.get('href') }
def create_cluster(self, data): """Start the cluster creation operation. Common broker function that validates data for the 'create cluster' operation and returns a dictionary with cluster detail and task information. Calls the asyncronous cluster create function that actually performs the work. The returned `result['task_href']` can be polled to get updates on task progress. Required data: cluster_name, org_name, ovdc_name, network_name Optional data and default values: num_nodes=2, num_cpu=None, mb_memory=None, storage_profile_name=None, ssh_key_filepath=None, template_name=default, template_revision=default, enable_nfs=False, rollback=True """ required = [ RequestKey.CLUSTER_NAME, RequestKey.ORG_NAME, RequestKey.OVDC_NAME, RequestKey.NETWORK_NAME ] utils.ensure_keys_in_dict(required, data, dict_name='data') cluster_name = data[RequestKey.CLUSTER_NAME] # check that cluster name is syntactically valid if not is_valid_cluster_name(cluster_name): raise CseServerError(f"Invalid cluster name '{cluster_name}'") # check that cluster name doesn't already exist try: get_cluster(self.tenant_client, cluster_name, org_name=data[RequestKey.ORG_NAME], ovdc_name=data[RequestKey.OVDC_NAME]) raise ClusterAlreadyExistsError(f"Cluster {cluster_name} " f"already exists.") except ClusterNotFoundError: pass # check that requested/default template is valid template = get_template( name=data.get(RequestKey.TEMPLATE_NAME), revision=data.get(RequestKey.TEMPLATE_REVISION)) defaults = { RequestKey.NUM_WORKERS: 2, RequestKey.NUM_CPU: None, RequestKey.MB_MEMORY: None, RequestKey.STORAGE_PROFILE_NAME: None, RequestKey.SSH_KEY_FILEPATH: None, RequestKey.TEMPLATE_NAME: template[LocalTemplateKey.NAME], RequestKey.TEMPLATE_REVISION: template[LocalTemplateKey.REVISION], RequestKey.ENABLE_NFS: False, RequestKey.ROLLBACK: True, } validated_data = {**defaults, **data} # TODO HACK default dictionary combining needs to be fixed validated_data[RequestKey.TEMPLATE_NAME] = validated_data[RequestKey.TEMPLATE_NAME] or template[LocalTemplateKey.NAME] # noqa: E501 validated_data[RequestKey.TEMPLATE_REVISION] = validated_data[RequestKey.TEMPLATE_REVISION] or template[LocalTemplateKey.REVISION] # noqa: E501 template_name = validated_data[RequestKey.TEMPLATE_NAME] template_revision = validated_data[RequestKey.TEMPLATE_REVISION] # check that requested number of worker nodes is at least more than 1 num_workers = validated_data[RequestKey.NUM_WORKERS] if num_workers < 1: raise CseServerError(f"Worker node count must be > 0 " f"(received {num_workers}).") cluster_id = str(uuid.uuid4()) # must _update_task or else self.task_resource is None # do not logout of sys admin, or else in pyvcloud's session.request() # call, session becomes None self._update_task( TaskStatus.RUNNING, message=f"Creating cluster vApp '{cluster_name}' ({cluster_id})" f" from template '{template_name}' " f"(revision {template_revision})") self._create_cluster_async( org_name=validated_data[RequestKey.ORG_NAME], ovdc_name=validated_data[RequestKey.OVDC_NAME], cluster_name=cluster_name, cluster_id=cluster_id, template_name=template_name, template_revision=template_revision, num_workers=validated_data[RequestKey.NUM_WORKERS], network_name=validated_data[RequestKey.NETWORK_NAME], num_cpu=validated_data[RequestKey.NUM_CPU], mb_memory=validated_data[RequestKey.MB_MEMORY], storage_profile_name=validated_data[RequestKey.STORAGE_PROFILE_NAME], # noqa: E501 ssh_key_filepath=validated_data[RequestKey.SSH_KEY_FILEPATH], enable_nfs=validated_data[RequestKey.ENABLE_NFS], rollback=validated_data[RequestKey.ROLLBACK]) return { 'name': cluster_name, 'cluster_id': cluster_id, 'task_href': self.task_resource.get('href') }