def consumer_thread(c): try: LOGGER.info(f"About to start consumer_thread {c}.") c.run() except Exception: click.echo("About to stop consumer_thread.") LOGGER.error(traceback.format_exc()) c.stop()
def get_node_info(self, cluster_name, node_name): """Get the info of a given node in the cluster. :param cluster_name: (str): Name of the cluster :param node_name: (str): Name of the node :return: (dict): Info of the node. """ self._connect_tenant() clusters = load_from_metadata( self.tenant_client, name=cluster_name, org_name=self.req_spec.get(RequestKey.ORG_NAME), vdc_name=self.req_spec.get(RequestKey.OVDC_NAME)) if len(clusters) > 1: raise CseDuplicateClusterError(f"Multiple clusters of name" f" '{cluster_name}' detected.") if len(clusters) == 0: raise ClusterNotFoundError(f"Cluster '{cluster_name}' not found.") vapp = VApp(self.tenant_client, href=clusters[0]['vapp_href']) vms = vapp.get_all_vms() node_info = None for vm in vms: if (node_name == vm.get('name')): node_info = { 'name': vm.get('name'), 'numberOfCpus': '', 'memoryMB': '', 'status': VCLOUD_STATUS_MAP.get(int(vm.get('status'))), 'ipAddress': '' } if hasattr(vm, 'VmSpecSection'): node_info[ 'numberOfCpus'] = vm.VmSpecSection.NumCpus.text node_info[ 'memoryMB'] = \ vm.VmSpecSection.MemoryResourceMb.Configured.text try: node_info['ipAddress'] = vapp.get_primary_ip( vm.get('name')) except Exception: LOGGER.debug(f"Unable to get ip address of node " f"{vm.get('name')}") if vm.get('name').startswith(NodeType.MASTER): node_info['node_type'] = 'master' elif vm.get('name').startswith(NodeType.WORKER): node_info['node_type'] = 'worker' elif vm.get('name').startswith(NodeType.NFS): node_info['node_type'] = 'nfs' exports = self._get_nfs_exports(node_info['ipAddress'], vapp, vm) node_info['exports'] = exports if node_info is None: raise NodeNotFoundError(f"Node '{node_name}' not found in " f"cluster '{cluster_name}'") return node_info
def setup_exchange(self, exchange_name): LOGGER.debug(f"Declaring exchange {exchange_name}") self._channel.exchange_declare( self.on_exchange_declareok, exchange=exchange_name, exchange_type=EXCHANGE_TYPE, passive=True, durable=True, auto_delete=False)
def on_connection_closed(self, connection, reply_code, reply_text): self._channel = None if self._closing: self._connection.ioloop.stop() else: LOGGER.warning( 'Connection closed, reopening in 5 seconds: ' '(%s) %s', reply_code, reply_text) self._connection.add_timeout(5, self.reconnect)
def exception_handler_wrapper(*args, **kwargs): result = {} try: result = func(*args, **kwargs) except Exception as err: result['status_code'] = INTERNAL_SERVER_ERROR result['body'] = error_to_json(err) LOGGER.error(traceback.format_exc()) return result
def send_too_many_requests_response(self, properties, body): if properties.reply_to is not None: body_json = utils.str_to_json(body, self.fsencoding)[0] reply_msg = self.form_response_json( request_id=body_json['id'], status_code=requests.codes.too_many_requests, reply_body_str=constants.TOO_MANY_REQUESTS_BODY) LOGGER.debug(f"reply: {constants.TOO_MANY_REQUESTS_BODY}") self.send_response(reply_msg, properties)
def get_node_info(self, data): """Get node metadata as dictionary. Required data: cluster_name, node_name Optional data and default values: org_name=None, ovdc_name=None """ required = [ RequestKey.CLUSTER_NAME, RequestKey.NODE_NAME ] utils.ensure_keys_in_dict(required, data, dict_name='data') defaults = { RequestKey.ORG_NAME: None, RequestKey.OVDC_NAME: None } validated_data = {**defaults, **data} cluster_name = validated_data[RequestKey.CLUSTER_NAME] node_name = validated_data[RequestKey.NODE_NAME] cluster = get_cluster(self.tenant_client, cluster_name, org_name=validated_data[RequestKey.ORG_NAME], ovdc_name=validated_data[RequestKey.OVDC_NAME]) vapp = VApp(self.tenant_client, href=cluster['vapp_href']) vms = vapp.get_all_vms() node_info = None for vm in vms: vm_name = vm.get('name') if node_name != vm_name: continue node_info = { 'name': vm_name, 'numberOfCpus': '', 'memoryMB': '', 'status': VCLOUD_STATUS_MAP.get(int(vm.get('status'))), 'ipAddress': '' } if hasattr(vm, 'VmSpecSection'): node_info['numberOfCpus'] = vm.VmSpecSection.NumCpus.text node_info['memoryMB'] = vm.VmSpecSection.MemoryResourceMb.Configured.text # noqa: E501 try: node_info['ipAddress'] = vapp.get_primary_ip(vm_name) except Exception: LOGGER.debug(f"Unable to get ip address of node {vm_name}") if vm_name.startswith(NodeType.MASTER): node_info['node_type'] = 'master' elif vm_name.startswith(NodeType.WORKER): node_info['node_type'] = 'worker' elif vm_name.startswith(NodeType.NFS): node_info['node_type'] = 'nfs' node_info['exports'] = self._get_nfs_exports(node_info['ipAddress'], vapp, vm_name) # noqa: E501 if node_info is None: raise NodeNotFoundError(f"Node '{node_name}' not found in " f"cluster '{cluster_name}'") return node_info
def run(self): LOGGER.debug(f"Thread started for operation={self.op}") if self.op == OP_CREATE_CLUSTER: self.create_cluster_thread() elif self.op == OP_DELETE_CLUSTER: self.delete_cluster_thread() elif self.op == OP_CREATE_NODES: self.create_nodes_thread() elif self.op == OP_DELETE_NODES: self.delete_nodes_thread()
def run(self): LOGGER.debug('thread started op=%s' % self.op) if self.op == OP_CREATE_CLUSTER: self.create_cluster_thread() elif self.op == OP_DELETE_CLUSTER: self.delete_cluster_thread() elif self.op == OP_CREATE_NODES: self.create_nodes_thread() elif self.op == OP_DELETE_NODES: self.delete_nodes_thread()
def _does_cluster_belong_to_vdc(self, cluster_info, vdc_name): # Returns True if the cluster backed by given vdc # Else False (this also includes missing compute profile name) compute_profile_name = cluster_info.get('compute_profile_name') if compute_profile_name is None: SERVER_LOGGER.debug("compute-profile-name of" f" {cluster_info.get('name')} is not found") return False return vdc_name == self._extract_vdc_name_from_pks_compute_profile_name( compute_profile_name) # noqa: E501
def exception_handler_wrapper(*args, **kwargs): try: result = func(*args, **kwargs) except (KeyError, TypeError, ValueError) as error: LOGGER.error(error) raise cse_exception.BadRequestError(error_message=str(error)) except Exception as error: LOGGER.error(error) raise error return result
def send_too_many_requests_response(self, msg): payload_json = utils.str_to_json(msg.payload, self.fsencoding) request_id = payload_json["headers"]["requestId"] LOGGER.debug(f"Replying with 'too many requests response' for " f"request_id: {request_id} and msg id: {msg.mid}") response_json = self.form_response_json( request_id=request_id, status_code=requests.codes.too_many_requests, reply_body_str=constants.TOO_MANY_REQUESTS_BODY) self.send_response(response_json)
def delete_cluster(self, data): """Delete the cluster with a given name in PKS environment. System administrator can delete the given cluster regardless of who is the owner of the cluster. Other users can only delete the cluster they own. :param str cluster_name: Name of the cluster """ cluster_name = data[RequestKey.CLUSTER_NAME] if self.tenant_client.is_sysadmin() \ or is_org_admin(self.client_session): cluster_info = self._get_cluster_info(data) qualified_cluster_name = cluster_info['pks_cluster_name'] else: qualified_cluster_name = self._append_user_id(cluster_name) result = {} cluster_api = ClusterApi(api_client=self.client) try: LOGGER.debug( f"Sending request to PKS: {self.pks_host_uri} to delete " f"the cluster with name: {qualified_cluster_name}") cluster_api.delete_cluster(cluster_name=qualified_cluster_name) LOGGER.debug( f"PKS: {self.pks_host_uri} accepted the request to delete" f" the cluster: {qualified_cluster_name}") except ApiException as err: LOGGER.debug(f"Deleting cluster {qualified_cluster_name} failed" f" with error:\n {err}") raise PksServerError(err.status, err.body) result['name'] = qualified_cluster_name result['task_status'] = 'in progress' # remove cluster network isolation LOGGER.debug("Removing network isolation of cluster " f"{qualified_cluster_name}.") try: cluster_network_isolater = ClusterNetworkIsolater(self.nsxt_client) cluster_network_isolater.remove_cluster_isolation( qualified_cluster_name) except Exception as err: # NSX-T oprations are idempotent so they should not cause erros # if say NSGroup is missing. But for any other exception, simply # catch them and ignore. LOGGER.debug(f"Error {err} occured while deleting cluster " "isolation rules for cluster " f"{qualified_cluster_name}") return result
def send_response(self, response_json): self._publish_lock.acquire() try: pub_ret = self._mqtt_client.publish(topic=self.respond_topic, payload=json.dumps( response_json), qos=constants.QOS_LEVEL, retain=False) finally: self._publish_lock.release() LOGGER.debug(f"publish return (rc, msg_id): {pub_ret}")
def get_node_info(self, cluster_name, node_name, headers): """Get the info of a given node in the cluster. :param cluster_name: (str): Name of the cluster :param node_name: (str): Name of the node :param headers: (str): Request headers :return: (dict): Info of the node. """ result = {} result['body'] = [] result['status_code'] = OK self._connect_tenant(headers) clusters = load_from_metadata(self.client_tenant, name=cluster_name) if len(clusters) == 0: raise CseServerError('Cluster \'%s\' not found.' % cluster_name) vapp = VApp(self.client_tenant, href=clusters[0]['vapp_href']) vms = vapp.get_all_vms() node_info = None for vm in vms: if (node_name == vm.get('name')): node_info = { 'name': vm.get('name'), 'numberOfCpus': '', 'memoryMB': '', 'status': VCLOUD_STATUS_MAP.get(int(vm.get('status'))), 'ipAddress': '' } if hasattr(vm, 'VmSpecSection'): node_info['numberOfCpus'] = vm.VmSpecSection.NumCpus.text node_info[ 'memoryMB'] = \ vm.VmSpecSection.MemoryResourceMb.Configured.text try: node_info['ipAddress'] = vapp.get_primary_ip( vm.get('name')) except Exception: LOGGER.debug('cannot get ip address ' 'for node %s' % vm.get('name')) if vm.get('name').startswith(TYPE_MASTER): node_info['node_type'] = 'master' elif vm.get('name').startswith(TYPE_NODE): node_info['node_type'] = 'node' elif vm.get('name').startswith(TYPE_NFS): node_info['node_type'] = 'nfsd' exports = self._get_nfs_exports(node_info['ipAddress'], vapp, vm) node_info['exports'] = exports if node_info is None: raise CseServerError('Node \'%s\' not found in cluster \'%s\'' % (node_name, cluster_name)) result['body'] = node_info return result
def create_compute_profile(self, cp_name, az_name, description, cpi, datacenter_name, cluster_name, ovdc_rp_name): """Create a PKS compute profile that maps to a given oVdc in vCD. :param str cp_name: Name of the compute profile :param str az_name: Name of the PKS availability zone to be defined :param str description: Description of the compute profile :param str cpi: Unique identifier provided by BOSH :param str datacenter_name: Name of the datacenter :param str cluster_name: Name of the cluster :param str ovdc_rp_name: Name of the oVdc resource pool :return: result :rtype: dict """ result = { 'body': [], 'status_code': requests.codes.ok, } profile_api = ProfileApi(api_client=self.pks_client) resource_pool = {'resource_pool': ovdc_rp_name} cloud_properties = { 'datacenters': [{ 'name': datacenter_name, 'clusters': [{ cluster_name: resource_pool }] }] } az = AZ(name=az_name, cpi=cpi, cloud_properties=cloud_properties) cp_params = ComputeProfileParameters(azs=[az]) cp_request = ComputeProfileRequest(name=cp_name, description=description, parameters=cp_params) self.pks_wire_logger.debug(f"Sending request to" f" PKS:{self.pks_host_uri} to create the" f" compute profile: {cp_name}" f" for ovdc {ovdc_rp_name}") try: profile_api.add_compute_profile(body=cp_request) except ApiException as err: SERVER_LOGGER.debug(f"Creating compute-profile {cp_name} in PKS" f" failed with error:\n {err}") raise PksServerError(err.status, err.body) self.pks_wire_logger.debug(f"PKS: {self.pks_host_uri} created the" f" compute profile: {cp_name}" f" for ovdc {ovdc_rp_name}") return result
def exception_handler_wrapper(*args, **kwargs): try: result = func(*args, **kwargs) except HTTPError as error: response_dict = json.loads(error.response.text) error_message = response_dict.get('message') LOGGER.error(error_message) raise cse_exception.DefEntityServiceError(error_message=error_message, minor_error_code=error.response.status_code) # noqa: E501 except Exception as error: LOGGER.error(error) raise error return result
def _does_cluster_belong_to_org(self, cluster_info, org_name): # Returns True if the cluster belongs to the given org # Else False (this also includes missing compute profile name) compute_profile_name = cluster_info.get('compute_profile_name') if compute_profile_name is None: LOGGER.debug(f"compute-profile-name of {cluster_info.get('name')}" f" is not found") return False vdc_id = extract_vdc_id_from_pks_compute_profile_name( compute_profile_name) return org_name == get_org_name_of_ovdc(vdc_id)
def process_amqp_message(self, properties, body, basic_deliver): msg_json, reply_body, status_code, req_id = utils.get_response_fields( request_msg=body, fsencoding=self.fsencoding, is_mqtt=False) if properties.reply_to is not None: reply_body_str = json.dumps(reply_body) reply_msg = self.form_response_json(request_id=req_id, status_code=status_code, reply_body_str=reply_body_str) self.send_response(reply_msg, properties) LOGGER.debug(f"AMQP reply: {reply_msg}")
def exception_handler_wrapper(*args, **kwargs): result = {} try: result = func(*args, **kwargs) except CseRequestError as e: result['status_code'] = e.status_code result['body'] = {'message': str(e)} LOGGER.error(traceback.format_exc()) except Exception as err: result['status_code'] = requests.codes.internal_server_error result['body'] = error_to_json(err) LOGGER.error(traceback.format_exc()) return result
def _does_cluster_belong_to_org(self, cluster_info, org_name): # Returns True if the cluster belongs to the given org # Else False (this also includes missing compute profile name) compute_profile_name = cluster_info.get('compute_profile_name') if compute_profile_name is None: SERVER_LOGGER.debug("compute-profile-name of" f" {cluster_info.get('name')} is not found") return False vdc_id = self._extract_vdc_id_from_pks_compute_profile_name( compute_profile_name) return org_name == get_org_name_from_ovdc_id( self.context.sysadmin_client, vdc_id)
def delete_cluster_thread(self): LOGGER.debug('about to delete cluster with name: %s', self.cluster_name) try: vdc = VDC(self.client_tenant, href=self.cluster['vdc_href']) task = vdc.delete_vapp(self.cluster['name'], force=True) self.client_tenant.get_task_monitor().wait_for_status(task) self.update_task(TaskStatus.SUCCESS, message='Deleted cluster %s(%s)' % (self.cluster_name, self.cluster_id)) except Exception as e: LOGGER.error(traceback.format_exc()) self.update_task(TaskStatus.ERROR, error_message=str(e))
def resize_cluster(self, **kwargs): """Resize the cluster of a given name to given number of worker nodes. System administrator can resize the given cluster regardless of who is the owner of the cluster. Other users can only resize the cluster they own. :return: response status :rtype: dict """ data = kwargs[KwargKey.DATA] cluster_name = data[RequestKey.CLUSTER_NAME] num_workers = data[RequestKey.NUM_WORKERS] qualified_cluster_name = self._append_user_id(cluster_name) if (self.context.client.is_sysadmin() or self.context.user.has_org_admin_rights): cluster_info = self._get_cluster_info(data) qualified_cluster_name = cluster_info['pks_cluster_name'] self._check_cluster_isolation(cluster_name, qualified_cluster_name) result = {} cluster_api = ClusterApi(api_client=self.pks_client) self.pks_wire_logger.debug(f"Sending request to" f" PKS:{self.pks_host_uri} to resize" f" the cluster with name:" f"{qualified_cluster_name} to" f" {num_workers} worker nodes") resize_params = \ UpdateClusterParameters(kubernetes_worker_instances=num_workers) try: cluster_api.update_cluster(qualified_cluster_name, body=resize_params) except ApiException as err: SERVER_LOGGER.debug(f"Resizing cluster {qualified_cluster_name}" f" failed with error:\n {err}") raise PksServerError(err.status, err.body) self.pks_wire_logger.debug(f"PKS: {self.pks_host_uri} accepted the" f" request to resize the cluster: " f" {qualified_cluster_name}") result['name'] = qualified_cluster_name result['task_status'] = 'in progress' self._restore_original_name(result) if not self.context.client.is_sysadmin(): self._filter_sensitive_pks_properties(result) return result
def delete_nodes(self, data): """Start the delete nodes operation. Validates data for the 'delete nodes' operation. Deleting nodes is an asynchronous task, so the returned `result['task_href']` can be polled to get updates on task progress. Required data: cluster_name, node_names_list Optional data and default values: org_name=None, ovdc_name=None """ required = [ RequestKey.CLUSTER_NAME, RequestKey.NODE_NAMES_LIST ] utils.ensure_keys_in_dict(required, data, dict_name='data') defaults = { RequestKey.ORG_NAME: None, RequestKey.OVDC_NAME: None } validated_data = {**defaults, **data} cluster_name = validated_data[RequestKey.CLUSTER_NAME] node_names_list = validated_data[RequestKey.NODE_NAMES_LIST] # check that there are nodes to delete if len(node_names_list) == 0: LOGGER.debug("No nodes specified to delete") return {'body': {}} # check that master node is not in specified nodes for node in node_names_list: if node.startswith(NodeType.MASTER): raise CseServerError(f"Can't delete a master node: '{node}'.") cluster = get_cluster(self.tenant_client, cluster_name, org_name=validated_data[RequestKey.ORG_NAME], ovdc_name=validated_data[RequestKey.OVDC_NAME]) cluster_id = cluster['cluster_id'] # must _update_task here or else self.task_resource is None # do not logout of sys admin, or else in pyvcloud's session.request() # call, session becomes None self._update_task( TaskStatus.RUNNING, message=f"Deleting {len(node_names_list)} node(s)" f" from cluster {cluster_name}({cluster_id})") self._delete_nodes_async( cluster_name=cluster_name, cluster_vapp_href=cluster['vapp_href'], node_names_list=validated_data[RequestKey.NODE_NAMES_LIST]) return { 'cluster_name': cluster_name, 'task_href': self.task_resource.get('href') }
def exception_handler_wrapper(*args, **kwargs): try: result = func(*args, **kwargs) except HTTPError as error: response_dict = json.loads(error.response.text) error_message = response_dict.get('message') LOGGER.error(error_message) raise cse_exceptions.DefSchemaServiceError( error_message=error_message, minor_error_code=MinorErrorCode.DEFAULT_ERROR_CODE) except Exception as error: LOGGER.error(error) raise error return result
def _delete_cluster_async(self, *args, cluster_name, cluster_vdc_href): try: self._update_task(TaskStatus.RUNNING, message=f"Deleting cluster {cluster_name}") self._delete_cluster(cluster_name=cluster_name, cluster_vdc_href=cluster_vdc_href) self._update_task(TaskStatus.SUCCESS, message=f"Deleted cluster {cluster_name}") except Exception as e: LOGGER.error(f"Unexpected error while deleting cluster: {e}", exc_info=True) self._update_task(TaskStatus.ERROR, error_message=str(e)) finally: self.logout_sys_admin_client()
def connect(self): LOGGER.info('Connecting to %s:%s' % (self.host, self.port)) credentials = pika.PlainCredentials(self.username, self.password) parameters = pika.ConnectionParameters(self.host, self.port, self.vhost, credentials, ssl=self.ssl, connection_attempts=3, retry_delay=2, socket_timeout=5) return pika.SelectConnection(parameters, self.on_connection_open, stop_ioloop_on_close=False)
def delete_nodes_thread(self): LOGGER.debug(f"About to delete nodes from cluster with name: " f"{self.cluster_name}") try: vapp = VApp(self.tenant_client, href=self.cluster['vapp_href']) template = self._get_template() self._update_task( TaskStatus.RUNNING, message=f"Deleting " f"{len(self.req_spec.get(RequestKey.NODE_NAMES_LIST))}" f" node(s) from " f"{self.cluster_name}({self.cluster_id})") try: server_config = get_server_runtime_config() delete_nodes_from_cluster( server_config, vapp, template, self.req_spec.get(RequestKey.NODE_NAMES_LIST), self.req_spec.get(RequestKey.FORCE_DELETE)) except Exception: LOGGER.error(f"Couldn't delete node " f"{self.req_spec.get(RequestKey.NODE_NAMES_LIST)}" f" from cluster:{self.cluster_name}") self._update_task( TaskStatus.RUNNING, message=f"Undeploying " f"{len(self.req_spec.get(RequestKey.NODE_NAMES_LIST))}" f" node(s) for {self.cluster_name}({self.cluster_id})") for vm_name in self.req_spec.get(RequestKey.NODE_NAMES_LIST): vm = VM(self.tenant_client, resource=vapp.get_vm(vm_name)) try: task = vm.undeploy() self.tenant_client.get_task_monitor().wait_for_status(task) except Exception: LOGGER.warning(f"Couldn't undeploy VM {vm_name}") self._update_task( TaskStatus.RUNNING, message=f"Deleting " f"{len(self.req_spec.get(RequestKey.NODE_NAMES_LIST))}" f" VM(s) for {self.cluster_name}({self.cluster_id})") task = vapp.delete_vms(self.req_spec.get(RequestKey.NODE_NAMES_LIST)) # noqa: E501 self.tenant_client.get_task_monitor().wait_for_status(task) self._update_task( TaskStatus.SUCCESS, message=f"Deleted " f"{len(self.req_spec.get(RequestKey.NODE_NAMES_LIST))}" f" node(s) to cluster " f"{self.cluster_name}({self.cluster_id})") except Exception as e: LOGGER.error(traceback.format_exc()) error_obj = error_to_json(e) stack_trace = \ ''.join(error_obj[ERROR_MESSAGE_KEY][ERROR_STACKTRACE_KEY]) self._update_task( TaskStatus.ERROR, error_message=error_obj[ERROR_MESSAGE_KEY][ERROR_DESCRIPTION_KEY], # noqa: E501 stack_trace=stack_trace) finally: self._disconnect_sys_admin()
def delete_cluster_thread(self): LOGGER.debug(f"About to delete cluster with name: {self.cluster_name}") try: vdc = VDC(self.tenant_client, href=self.cluster['vdc_href']) task = vdc.delete_vapp(self.cluster['name'], force=True) self.tenant_client.get_task_monitor().wait_for_status(task) self.update_task(TaskStatus.SUCCESS, message=f"Deleted cluster {self.cluster_name}" f"({self.cluster_id})") except Exception as e: LOGGER.error(traceback.format_exc()) self.update_task(TaskStatus.ERROR, error_message=str(e)) finally: self._disconnect_sys_admin()
def _isolate_cluster(self, cluster_name, qualified_cluster_name, cluster_id): if not cluster_id: raise ValueError( f"Invalid cluster_id for cluster : '{cluster_name}'") LOGGER.debug(f"Isolating network of cluster {cluster_name}.") try: cluster_network_isolater = ClusterNetworkIsolater(self.nsxt_client) cluster_network_isolater.isolate_cluster(qualified_cluster_name, cluster_id) except Exception as err: raise ClusterNetworkIsolationError( f"Cluster : '{cluster_name}' is in an unusable state. Failed " "to isolate cluster network") from err