def _wait_until_ready_to_exec(vs, vm, password, tries=30): ready = False script = "#!/usr/bin/env bash\n" \ "uname -a\n" for _ in range(tries): result = vs.execute_script_in_guest( vm, 'root', password, script, target_file=None, wait_for_completion=True, wait_time=5, get_output=True, delete_script=True, callback=_wait_for_guest_execution_callback) if result[0] == 0: ready = True break LOGGER.info(f"Script returned {result[0]}; VM is not " f"ready to execute scripts, yet") time.sleep(2) if not ready: raise CseServerError('VM is not ready to execute scripts')
def wait_until_ready_to_exec(vs, vm, password, tries=30): ready = False script = \ """#!/usr/bin/env bash uname -a """ # NOQA for n in range(tries): try: result = vs.execute_script_in_guest( vm, 'root', password, script, target_file=None, wait_for_completion=True, wait_time=5, get_output=True, delete_script=True, callback=wait_for_guest_execution_callback) if result[0] == 0: ready = True break raise Exception(f"script returned {result[0]}") except Exception: LOGGER.info("VM is not ready to execute scripts, yet") time.sleep(2) if not ready: raise CseServerError('VM is not ready to execute scripts')
def consumer_thread(c): try: LOGGER.info(f"About to start consumer_thread {c}.") c.run() except Exception: click.echo("About to stop consumer_thread.") LOGGER.error(traceback.format_exc()) c.stop()
def stop(self): LOGGER.info("Stopping") self._closing = True self._ctpe.shutdown(wait=True) self.stop_consuming() if self._connection: self._connection.ioloop.stop() LOGGER.info("Stopped")
def consumer_thread(c): try: LOGGER.info('about to start consumer_thread %s', c) c.run() except Exception: click.echo('about to stop consumer_thread') LOGGER.error(traceback.format_exc()) c.stop()
def cluster_rollback(self): """Implements rollback for cluster creation failure""" LOGGER.info('About to rollback cluster with name: %s' % self.cluster_name) clusters = load_from_metadata(self.client_tenant, name=self.cluster_name) if len(clusters) != 1: LOGGER.debug('Cluster %s not found.' % self.cluster_name) return self.cluster = clusters[0] vdc = VDC(self.client_tenant, href=self.cluster['vdc_href']) vdc.delete_vapp(self.cluster['name'], force=True) LOGGER.info('Successfully deleted cluster: %s' % self.cluster_name)
def cluster_rollback(self): """Rollback for cluster creation failure.""" LOGGER.info(f"About to rollback cluster with name: " "{self.cluster_name}") self._connect_tenant() clusters = load_from_metadata( self.tenant_client, name=self.cluster_name) if len(clusters) != 1: LOGGER.debug(f"Cluster {self.cluster_name} not found.") return self.cluster = clusters[0] vdc = VDC(self.tenant_client, href=self.cluster['vdc_href']) vdc.delete_vapp(self.cluster['name'], force=True) LOGGER.info(f"Successfully deleted cluster: {self.cluster_name}")
def connect(self): LOGGER.info('Connecting to %s:%s' % (self.host, self.port)) credentials = pika.PlainCredentials(self.username, self.password) parameters = pika.ConnectionParameters(self.host, self.port, self.vhost, credentials, ssl=self.ssl, connection_attempts=3, retry_delay=2, socket_timeout=5) return pika.SelectConnection(parameters, self.on_connection_open, stop_ioloop_on_close=False)
def node_rollback(self, node_list): """Implements rollback for node creation failure :param list node_list: faulty nodes to be deleted """ LOGGER.info('About to rollback nodes from cluster with name: %s' % self.cluster_name) LOGGER.info('Node list to be deleted:%s' % node_list) vapp = VApp(self.client_tenant, href=self.cluster['vapp_href']) template = self.get_template() try: delete_nodes_from_cluster(self.config, vapp, template, node_list, force=True) except Exception: LOGGER.warning("Couldn't delete node %s from cluster:%s" % (node_list, self.cluster_name)) for vm_name in node_list: vm = VM(self.client_tenant, resource=vapp.get_vm(vm_name)) try: vm.undeploy() except Exception: LOGGER.warning("Couldn't undeploy VM %s" % vm_name) vapp.delete_vms(node_list) LOGGER.info('Successfully deleted nodes: %s' % node_list)
def node_rollback(self, node_list): """Rollback for node creation failure. :param list node_list: faulty nodes to be deleted """ LOGGER.info(f"About to rollback nodes from cluster with name: " "{self.cluster_name}") LOGGER.info(f"Node list to be deleted:{node_list}") vapp = VApp(self.tenant_client, href=self.cluster['vapp_href']) template = self._get_template() try: server_config = get_server_runtime_config() delete_nodes_from_cluster(server_config, vapp, template, node_list, force=True) except Exception: LOGGER.warning("Couldn't delete node {node_list} from cluster:" "{self.cluster_name}") for vm_name in node_list: vm = VM(self.tenant_client, resource=vapp.get_vm(vm_name)) try: vm.undeploy() except Exception: LOGGER.warning(f"Couldn't undeploy VM {vm_name}") vapp.delete_vms(node_list) LOGGER.info(f"Successfully deleted nodes: {node_list}")
def on_subscribe(mqtt_client, userdata, msg_id, given_qos): LOGGER.info(f'MQTT client subscribed with given_qos: {given_qos}')
def close_connection(self): LOGGER.info('Closing connection') self._connection.close()
def stop(self): LOGGER.info('Stopping') self._closing = True self.stop_consuming() self._connection.ioloop.start() LOGGER.info('Stopped')
def stop_consuming(self): if self._channel: LOGGER.info('Sending a Basic.Cancel RPC command to RabbitMQ') self._channel.basic_cancel(self.on_cancelok, self._consumer_tag)
def run(self, msg_update_callback=None): configure_server_logger() self.config = get_validated_config( self.config_file, msg_update_callback=msg_update_callback) populate_vsphere_list(self.config['vcs']) # Read K8 catalog definition from catalog item metadata and append # to server config client = None try: client = Client(self.config['vcd']['host'], api_version=self.config['vcd']['api_version'], verify_ssl_certs=self.config['vcd']['verify'], log_file=SERVER_DEBUG_WIRELOG_FILEPATH, log_requests=True, log_headers=True, log_bodies=True) credentials = BasicLoginCredentials(self.config['vcd']['username'], SYSTEM_ORG_NAME, self.config['vcd']['password']) client.set_credentials(credentials) org_name = self.config['broker']['org'] catalog_name = self.config['broker']['catalog'] k8_templates = get_all_k8s_local_template_definition( client=client, catalog_name=catalog_name, org_name=org_name) if not k8_templates: msg = "No valid K8 templates were found in catalog " \ f"'{catalog_name}'. Unable to start CSE server." if msg_update_callback: msg_update_callback.error(msg) LOGGER.error(msg) sys.exit(1) # Check that deafult K8 template exists in vCD at the correct # revision default_template_name = \ self.config['broker']['default_template_name'] default_template_revision = \ str(self.config['broker']['default_template_revision']) found_default_template = False for template in k8_templates: if str(template['revision']) == default_template_revision \ and template['name'] == default_template_name: found_default_template = True msg = f"Found K8 template '{template['name']}' at revision " \ f"{template['revision']} in catalog '{catalog_name}'" if msg_update_callback: msg_update_callback.general(msg) LOGGER.info(msg) if not found_default_template: msg = f"Default template {default_template_name} with " \ f"revision {default_template_revision} not found." \ " Unable to start CSE server." if msg_update_callback: msg_update_callback.error(msg) LOGGER.error(msg) sys.exit(1) self.config['broker']['templates'] = k8_templates finally: if client: client.logout() # TODO Rule framework, update config with rules if self.should_check_config: check_cse_installation(self.config, msg_update_callback=msg_update_callback) if self.config.get('pks_config'): pks_config = self.config.get('pks_config') self.pks_cache = PksCache( pks_servers=pks_config.get('pks_api_servers', []), pks_accounts=pks_config.get('pks_accounts', []), pvdcs=pks_config.get('pvdcs', []), orgs=pks_config.get('orgs', []), nsxt_servers=pks_config.get('nsxt_servers', [])) amqp = self.config['amqp'] num_consumers = self.config['service']['listeners'] for n in range(num_consumers): try: c = MessageConsumer(amqp['host'], amqp['port'], amqp['ssl'], amqp['vhost'], amqp['username'], amqp['password'], amqp['exchange'], amqp['routing_key']) name = 'MessageConsumer-%s' % n t = Thread(name=name, target=consumer_thread, args=(c, )) t.daemon = True t.start() msg = f"Started thread '{name} ({t.ident})'" if msg_update_callback: msg_update_callback.general(msg) LOGGER.info(msg) self.threads.append(t) self.consumers.append(c) time.sleep(0.25) except KeyboardInterrupt: break except Exception: LOGGER.error(traceback.format_exc()) LOGGER.info(f"Number of threads started: {len(self.threads)}") self._state = ServerState.RUNNING message = f"Container Service Extension for vCloud Director" \ f"\nServer running using config file: {self.config_file}" \ f"\nLog files: {SERVER_INFO_LOG_FILEPATH}, " \ f"{SERVER_DEBUG_LOG_FILEPATH}" \ f"\nwaiting for requests (ctrl+c to close)" signal.signal(signal.SIGINT, signal_handler) if msg_update_callback: msg_update_callback.general_no_color(message) LOGGER.info(message) while True: try: time.sleep(1) if self._state == ServerState.STOPPING and \ self.active_requests_count() == 0: break except KeyboardInterrupt: break except Exception: if msg_update_callback: msg_update_callback.general_no_color( traceback.format_exc()) LOGGER.error(traceback.format_exc()) sys.exit(1) LOGGER.info("Stop detected") LOGGER.info("Closing connections...") for c in self.consumers: try: c.stop() except Exception: pass self._state = ServerState.STOPPED LOGGER.info("Done")
def run(self, msg_update_callback=None): self.config = get_validated_config( self.config_file, msg_update_callback=msg_update_callback) if self.should_check_config: check_cse_installation(self.config, msg_update_callback=msg_update_callback) configure_server_logger() message = f"Container Service Extension for vCloudDirector" \ f"\nServer running using config file: {self.config_file}" \ f"\nLog files: {SERVER_INFO_LOG_FILEPATH}, " \ f"{SERVER_DEBUG_LOG_FILEPATH}" \ f"\nwaiting for requests (ctrl+c to close)" signal.signal(signal.SIGINT, signal_handler) if msg_update_callback: msg_update_callback.general_no_color(message) LOGGER.info(message) if self.config.get('pks_config'): pks_config = self.config.get('pks_config') self.pks_cache = PksCache( pks_servers=pks_config.get('pks_api_servers', []), pks_accounts=pks_config.get('pks_accounts', []), pvdcs=pks_config.get('pvdcs', []), orgs=pks_config.get('orgs', []), nsxt_servers=pks_config.get('nsxt_servers', [])) amqp = self.config['amqp'] num_consumers = self.config['service']['listeners'] for n in range(num_consumers): try: c = MessageConsumer(amqp['host'], amqp['port'], amqp['ssl'], amqp['vhost'], amqp['username'], amqp['password'], amqp['exchange'], amqp['routing_key']) name = 'MessageConsumer-%s' % n t = Thread(name=name, target=consumer_thread, args=(c, )) t.daemon = True t.start() LOGGER.info("Started thread {t.ident}") self.threads.append(t) self.consumers.append(c) time.sleep(0.25) except KeyboardInterrupt: break except Exception: print(traceback.format_exc()) LOGGER.info(f"Number of threads started: {len(self.threads)}") self._state = ServerState.RUNNING while True: try: time.sleep(1) if self._state == ServerState.STOPPING and \ self.active_requests_count() == 0: break except KeyboardInterrupt: break except Exception: if msg_update_callback: msg_update_callback.general_no_color( traceback.format_exc()) sys.exit(1) LOGGER.info("Stop detected") LOGGER.info("Closing connections...") for c in self.consumers: try: c.stop() except Exception: pass self._state = ServerState.STOPPED LOGGER.info("Done")
def _load_template_definition_from_catalog(self, msg_update_callback=None): msg = "Loading k8s template definition from catalog" LOGGER.info(msg) if msg_update_callback: msg_update_callback.general_no_color(msg) client = None try: log_filename = None log_wire = str_to_bool(self.config['service'].get('log_wire')) if log_wire: log_filename = SERVER_DEBUG_WIRELOG_FILEPATH client = Client(self.config['vcd']['host'], api_version=self.config['vcd']['api_version'], verify_ssl_certs=self.config['vcd']['verify'], log_file=log_filename, log_requests=log_wire, log_headers=log_wire, log_bodies=log_wire) credentials = BasicLoginCredentials(self.config['vcd']['username'], SYSTEM_ORG_NAME, self.config['vcd']['password']) client.set_credentials(credentials) org_name = self.config['broker']['org'] catalog_name = self.config['broker']['catalog'] k8_templates = get_all_k8s_local_template_definition( client=client, catalog_name=catalog_name, org_name=org_name) if not k8_templates: msg = "No valid K8 templates were found in catalog " \ f"'{catalog_name}'. Unable to start CSE server." if msg_update_callback: msg_update_callback.error(msg) LOGGER.error(msg) sys.exit(1) # Check that default k8s template exists in vCD at the correct # revision default_template_name = \ self.config['broker']['default_template_name'] default_template_revision = \ str(self.config['broker']['default_template_revision']) found_default_template = False for template in k8_templates: if str(template[LocalTemplateKey.REVISION] ) == default_template_revision and template[ LocalTemplateKey. NAME] == default_template_name: # noqa: E501 found_default_template = True msg = f"Found K8 template '{template['name']}' at revision " \ f"{template['revision']} in catalog '{catalog_name}'" if msg_update_callback: msg_update_callback.general(msg) LOGGER.info(msg) if not found_default_template: msg = f"Default template {default_template_name} with " \ f"revision {default_template_revision} not found." \ " Unable to start CSE server." if msg_update_callback: msg_update_callback.error(msg) LOGGER.error(msg) sys.exit(1) self.config['broker']['templates'] = k8_templates finally: if client: client.logout()
def _process_template_compute_policy_compliance(self, msg_update_callback=None): msg = "Processing compute policy for k8s templates." LOGGER.info(msg) if msg_update_callback: msg_update_callback.general_no_color(msg) log_filename = None log_wire = str_to_bool(self.config['service'].get('log_wire')) if log_wire: log_filename = SERVER_DEBUG_WIRELOG_FILEPATH org_name = self.config['broker']['org'] catalog_name = self.config['broker']['catalog'] api_version = self.config['vcd']['api_version'] client = None try: if float(api_version) >= float( ApiVersion.VERSION_32.value): # noqa: E501 # TODO this api version 32 client should be removed once # vcd can handle cp removal/replacement on version 33 client = Client(self.config['vcd']['host'], api_version=ApiVersion.VERSION_32.value, verify_ssl_certs=self.config['vcd']['verify'], log_file=log_filename, log_requests=log_wire, log_headers=log_wire, log_bodies=log_wire) else: client = Client(self.config['vcd']['host'], api_version=api_version, verify_ssl_certs=self.config['vcd']['verify'], log_file=log_filename, log_requests=log_wire, log_headers=log_wire, log_bodies=log_wire) credentials = BasicLoginCredentials(self.config['vcd']['username'], SYSTEM_ORG_NAME, self.config['vcd']['password']) client.set_credentials(credentials) cpm = ComputePolicyManager(client) try: for template in self.config['broker']['templates']: policy_name = template[LocalTemplateKey.COMPUTE_POLICY] catalog_item_name = template[ LocalTemplateKey.CATALOG_ITEM_NAME] # noqa: E501 # if policy name is not empty, stamp it on the template if policy_name: policy = cpm.get_policy(policy_name=policy_name) # create the policy if not present in system if not policy: msg = "Creating missing compute policy " \ f"'{policy_name}'." if msg_update_callback: msg_update_callback.info(msg) LOGGER.debug(msg) policy = cpm.add_policy(policy_name=policy_name) msg = f"Assigning compute policy '{policy_name}' to " \ f"template '{catalog_item_name}'." if msg_update_callback: msg_update_callback.general(msg) LOGGER.debug(msg) cpm.assign_compute_policy_to_vapp_template_vms( compute_policy_href=policy['href'], org_name=org_name, catalog_name=catalog_name, catalog_item_name=catalog_item_name) else: # empty policy name means we should remove policy from # template msg = f"Removing compute policy from template " \ f"'{catalog_item_name}'." if msg_update_callback: msg_update_callback.general(msg) LOGGER.debug(msg) cpm.remove_all_compute_policies_from_vapp_template_vms( org_name=org_name, catalog_name=catalog_name, catalog_item_name=catalog_item_name) except OperationNotSupportedException: msg = "Compute policy not supported by vCD. Skipping " \ "assigning/removing it to/from templates." if msg_update_callback: msg_update_callback.info(msg) LOGGER.debug(msg) finally: if client: client.logout()
def stop(self): LOGGER.info("MQTT consumer stopping") self._is_closing = True self._ctpe.shutdown(wait=True) # Let jobs finish before disconnecting if self._mqtt_client: self._mqtt_client.disconnect()
def run(self, msg_update_callback=None): configure_server_logger() self.config = get_validated_config( self.config_file, msg_update_callback=msg_update_callback) populate_vsphere_list(self.config['vcs']) # Read k8s catalog definition from catalog item metadata and append # the same to to server run-time config self._load_template_definition_from_catalog( msg_update_callback=msg_update_callback) # Read templates rules from config and update template deinfition in # server run-time config self._process_template_rules(msg_update_callback=msg_update_callback) # Make sure that all vms in templates are compliant with the compute # policy specified in template definition (can be affected by rules). self._process_template_compute_policy_compliance( msg_update_callback=msg_update_callback) if self.should_check_config: check_cse_installation(self.config, msg_update_callback=msg_update_callback) if self.config.get('pks_config'): pks_config = self.config.get('pks_config') self.pks_cache = PksCache( pks_servers=pks_config.get('pks_api_servers', []), pks_accounts=pks_config.get('pks_accounts', []), pvdcs=pks_config.get('pvdcs', []), orgs=pks_config.get('orgs', []), nsxt_servers=pks_config.get('nsxt_servers', [])) amqp = self.config['amqp'] num_consumers = self.config['service']['listeners'] for n in range(num_consumers): try: c = MessageConsumer(amqp['host'], amqp['port'], amqp['ssl'], amqp['vhost'], amqp['username'], amqp['password'], amqp['exchange'], amqp['routing_key']) name = 'MessageConsumer-%s' % n t = Thread(name=name, target=consumer_thread, args=(c, )) t.daemon = True t.start() msg = f"Started thread '{name} ({t.ident})'" if msg_update_callback: msg_update_callback.general(msg) LOGGER.info(msg) self.threads.append(t) self.consumers.append(c) time.sleep(0.25) except KeyboardInterrupt: break except Exception: LOGGER.error(traceback.format_exc()) LOGGER.info(f"Number of threads started: {len(self.threads)}") self._state = ServerState.RUNNING message = f"Container Service Extension for vCloud Director" \ f"\nServer running using config file: {self.config_file}" \ f"\nLog files: {SERVER_INFO_LOG_FILEPATH}, " \ f"{SERVER_DEBUG_LOG_FILEPATH}" \ f"\nwaiting for requests (ctrl+c to close)" signal.signal(signal.SIGINT, signal_handler) if msg_update_callback: msg_update_callback.general_no_color(message) LOGGER.info(message) while True: try: time.sleep(1) if self._state == ServerState.STOPPING and \ self.active_requests_count() == 0: break except KeyboardInterrupt: break except Exception: if msg_update_callback: msg_update_callback.general_no_color( traceback.format_exc()) LOGGER.error(traceback.format_exc()) sys.exit(1) LOGGER.info("Stop detected") LOGGER.info("Closing connections...") for c in self.consumers: try: c.stop() except Exception: LOGGER.error(traceback.format_exc()) self._state = ServerState.STOPPED LOGGER.info("Done")
def on_disconnect(mqtt_client, userdata, rc): LOGGER.info(f'MQTT disconnect with reason: {rc}')
def _create_cluster_async(self, *args, org_name, ovdc_name, cluster_name, cluster_id, template_name, template_revision, num_workers, network_name, num_cpu, mb_memory, storage_profile_name, ssh_key_filepath, enable_nfs, rollback): org = vcd_utils.get_org(self.tenant_client, org_name=org_name) vdc = vcd_utils.get_vdc( self.tenant_client, vdc_name=ovdc_name, org=org) LOGGER.debug(f"About to create cluster {cluster_name} on {ovdc_name}" f" with {num_workers} worker nodes, " f"storage profile={storage_profile_name}") try: self._update_task( TaskStatus.RUNNING, message=f"Creating cluster vApp {cluster_name}({cluster_id})") try: vapp_resource = \ vdc.create_vapp(cluster_name, description=f"cluster {cluster_name}", network=network_name, fence_mode='bridged') except Exception as e: msg = f"Error while creating vApp: {e}" LOGGER.debug(str(e)) raise ClusterOperationError(msg) self.tenant_client.get_task_monitor().wait_for_status(vapp_resource.Tasks.Task[0]) # noqa: E501 template = get_template(template_name, template_revision) tags = { ClusterMetadataKey.CLUSTER_ID: cluster_id, ClusterMetadataKey.CSE_VERSION: pkg_resources.require('container-service-extension')[0].version, # noqa: E501 ClusterMetadataKey.TEMPLATE_NAME: template[LocalTemplateKey.NAME], # noqa: E501 ClusterMetadataKey.TEMPLATE_REVISION: template[LocalTemplateKey.REVISION] # noqa: E501 } vapp = VApp(self.tenant_client, href=vapp_resource.get('href')) task = vapp.set_multiple_metadata(tags) self.tenant_client.get_task_monitor().wait_for_status(task) self._update_task( TaskStatus.RUNNING, message=f"Creating master node for " f"{cluster_name} ({cluster_id})") vapp.reload() server_config = utils.get_server_runtime_config() catalog_name = server_config['broker']['catalog'] try: add_nodes(client=self.tenant_client, num_nodes=1, node_type=NodeType.MASTER, org=org, vdc=vdc, vapp=vapp, catalog_name=catalog_name, template=template, network_name=network_name, num_cpu=num_cpu, memory_in_mb=mb_memory, storage_profile=storage_profile_name, ssh_key_filepath=ssh_key_filepath) except Exception as e: raise MasterNodeCreationError("Error adding master node:", str(e)) self._update_task( TaskStatus.RUNNING, message=f"Initializing cluster {cluster_name} ({cluster_id})") vapp.reload() init_cluster(vapp, template[LocalTemplateKey.NAME], template[LocalTemplateKey.REVISION]) master_ip = get_master_ip(vapp) task = vapp.set_metadata('GENERAL', 'READWRITE', 'cse.master.ip', master_ip) self.tenant_client.get_task_monitor().wait_for_status(task) self._update_task( TaskStatus.RUNNING, message=f"Creating {num_workers} node(s) for " f"{cluster_name}({cluster_id})") try: add_nodes(client=self.tenant_client, num_nodes=num_workers, node_type=NodeType.WORKER, org=org, vdc=vdc, vapp=vapp, catalog_name=catalog_name, template=template, network_name=network_name, num_cpu=num_cpu, memory_in_mb=mb_memory, storage_profile=storage_profile_name, ssh_key_filepath=ssh_key_filepath) except Exception as e: raise WorkerNodeCreationError("Error creating worker node:", str(e)) self._update_task( TaskStatus.RUNNING, message=f"Adding {num_workers} node(s) to " f"{cluster_name}({cluster_id})") vapp.reload() join_cluster(vapp, template[LocalTemplateKey.NAME], template[LocalTemplateKey.REVISION]) if enable_nfs: self._update_task( TaskStatus.RUNNING, message=f"Creating NFS node for " f"{cluster_name} ({cluster_id})") try: add_nodes(client=self.tenant_client, num_nodes=1, node_type=NodeType.NFS, org=org, vdc=vdc, vapp=vapp, catalog_name=catalog_name, template=template, network_name=network_name, num_cpu=num_cpu, memory_in_mb=mb_memory, storage_profile=storage_profile_name, ssh_key_filepath=ssh_key_filepath) except Exception as e: raise NFSNodeCreationError("Error creating NFS node:", str(e)) self._update_task( TaskStatus.SUCCESS, message=f"Created cluster {cluster_name} ({cluster_id})") except (MasterNodeCreationError, WorkerNodeCreationError, NFSNodeCreationError, ClusterJoiningError, ClusterInitializationError, ClusterOperationError) as e: if rollback: msg = f"Error creating cluster {cluster_name}. " \ f"Deleting cluster (rollback=True)" self._update_task(TaskStatus.RUNNING, message=msg) LOGGER.info(msg) try: cluster = get_cluster(self.tenant_client, cluster_name, cluster_id=cluster_id, org_name=org_name, ovdc_name=ovdc_name) self._delete_cluster(cluster_name=cluster_name, cluster_vdc_href=cluster['vdc_href']) except Exception: LOGGER.error(f"Failed to delete cluster {cluster_name}", exc_info=True) LOGGER.error(f"Error creating cluster {cluster_name}", exc_info=True) error_obj = error_to_json(e) stack_trace = ''.join(error_obj[ERROR_MESSAGE_KEY][ERROR_STACKTRACE_KEY]) # noqa: E501 self._update_task( TaskStatus.ERROR, error_message=error_obj[ERROR_MESSAGE_KEY][ERROR_DESCRIPTION_KEY], # noqa: E501 stack_trace=stack_trace) # raising an exception here prints a stacktrace to server console except Exception as e: LOGGER.error(f"Unknown error creating cluster {cluster_name}", exc_info=True) error_obj = error_to_json(e) stack_trace = ''.join(error_obj[ERROR_MESSAGE_KEY][ERROR_STACKTRACE_KEY]) # noqa: E501 self._update_task( TaskStatus.ERROR, error_message=error_obj[ERROR_MESSAGE_KEY][ERROR_DESCRIPTION_KEY], # noqa: E501 stack_trace=stack_trace) finally: self.logout_sys_admin_client()
def on_connect(mqtt_client, userdata, flags, rc): LOGGER.info(f'MQTT client connected with result code {rc} and ' f'flags {flags}') mqtt_client.subscribe(self.listen_topic, qos=constants.QOS_LEVEL)
def _create_nodes_async(self, *args, cluster_name, cluster_vdc_href, cluster_vapp_href, cluster_id, template_name, template_revision, num_workers, network_name, num_cpu, mb_memory, storage_profile_name, ssh_key_filepath, enable_nfs, rollback): org = vcd_utils.get_org(self.tenant_client) vdc = VDC(self.tenant_client, href=cluster_vdc_href) vapp = VApp(self.tenant_client, href=cluster_vapp_href) template = get_template(name=template_name, revision=template_revision) msg = f"Creating {num_workers} node(s) from template " \ f"'{template_name}' (revision {template_revision}) and " \ f"adding to {cluster_name} ({cluster_id})" LOGGER.debug(msg) try: self._update_task(TaskStatus.RUNNING, message=msg) node_type = NodeType.WORKER if enable_nfs: node_type = NodeType.NFS server_config = utils.get_server_runtime_config() catalog_name = server_config['broker']['catalog'] new_nodes = add_nodes(client=self.tenant_client, num_nodes=num_workers, node_type=node_type, org=org, vdc=vdc, vapp=vapp, catalog_name=catalog_name, template=template, network_name=network_name, num_cpu=num_cpu, memory_in_mb=mb_memory, storage_profile=storage_profile_name, ssh_key_filepath=ssh_key_filepath) if node_type == NodeType.NFS: self._update_task( TaskStatus.SUCCESS, message=f"Created {num_workers} node(s) for " f"{cluster_name}({cluster_id})") elif node_type == NodeType.WORKER: self._update_task( TaskStatus.RUNNING, message=f"Adding {num_workers} node(s) to cluster " f"{cluster_name}({cluster_id})") target_nodes = [] for spec in new_nodes['specs']: target_nodes.append(spec['target_vm_name']) vapp.reload() join_cluster(vapp, template[LocalTemplateKey.NAME], template[LocalTemplateKey.REVISION], target_nodes) self._update_task( TaskStatus.SUCCESS, message=f"Added {num_workers} node(s) to cluster " f"{cluster_name}({cluster_id})") except NodeCreationError as e: if rollback: msg = f"Error adding nodes to {cluster_name} {cluster_id}." \ f" Deleting nodes: {e.node_names} (rollback=True)" self._update_task(TaskStatus.RUNNING, message=msg) LOGGER.info(msg) try: self._delete_nodes(cluster_name=cluster_name, cluster_vapp_href=cluster_vapp_href, node_names_list=e.node_names) except Exception: LOGGER.error(f"Failed to delete nodes {e.node_names} " f"from cluster {cluster_name}", exc_info=True) LOGGER.error(f"Error adding nodes to {cluster_name}", exc_info=True) error_obj = error_to_json(e) LOGGER.error(str(e), exc_info=True) stack_trace = ''.join(error_obj[ERROR_MESSAGE_KEY][ERROR_STACKTRACE_KEY]) # noqa: E501 self._update_task( TaskStatus.ERROR, error_message=error_obj[ERROR_MESSAGE_KEY][ERROR_DESCRIPTION_KEY], # noqa: E501 stack_trace=stack_trace) # raising an exception here prints a stacktrace to server console except Exception as e: error_obj = error_to_json(e) LOGGER.error(str(e), exc_info=True) stack_trace = ''.join(error_obj[ERROR_MESSAGE_KEY][ERROR_STACKTRACE_KEY]) # noqa: E501 self._update_task( TaskStatus.ERROR, error_message=error_obj[ERROR_MESSAGE_KEY][ERROR_DESCRIPTION_KEY], # noqa: E501 stack_trace=stack_trace) finally: self.logout_sys_admin_client()
def run(self): self.config = get_validated_config(self.config_file) if self.should_check_config: check_cse_installation(self.config) configure_server_logger() message = f"Container Service Extension for vCloudDirector" \ f"\nServer running using config file: {self.config_file}" \ f"\nLog files: {SERVER_INFO_LOG_FILEPATH}, " \ f"{SERVER_DEBUG_LOG_FILEPATH}" \ f"\nwaiting for requests (ctrl+c to close)" signal.signal(signal.SIGINT, signal_handler) click.secho(message) LOGGER.info(message) amqp = self.config['amqp'] num_consumers = self.config['service']['listeners'] for n in range(num_consumers): try: c = MessageConsumer(amqp['host'], amqp['port'], amqp['ssl'], amqp['vhost'], amqp['username'], amqp['password'], amqp['exchange'], amqp['routing_key']) name = 'MessageConsumer-%s' % n t = Thread(name=name, target=consumer_thread, args=(c, )) t.daemon = True t.start() LOGGER.info('started thread %s', t.ident) self.threads.append(t) self.consumers.append(c) time.sleep(0.25) except KeyboardInterrupt: break except Exception: print(traceback.format_exc()) LOGGER.info('num of threads started: %s', len(self.threads)) self.is_enabled = True while True: try: time.sleep(1) if self.should_stop and self.active_requests_count() == 0: break except KeyboardInterrupt: break except Exception: click.secho(traceback.format_exc()) sys.exit(1) LOGGER.info('stop detected') LOGGER.info('closing connections...') for c in self.consumers: try: c.stop() except Exception: pass LOGGER.info('done')