def bay_delete(self, context, uuid): LOG.debug('bay_heat bay_delete') osc = clients.OpenStackClients(context) bay = objects.Bay.get_by_uuid(context, uuid) stack_id = bay.stack_id # NOTE(sdake): This will execute a stack_delete operation. This will # Ignore HTTPNotFound exceptions (stack wasn't present). In the case # that Heat couldn't find the stack representing the bay, likely a user # has deleted the stack outside the context of Magnum. Therefore the # contents of the bay are forever lost. # # If the exception is unhandled, the original exception will be raised. try: osc.heat().stacks.delete(stack_id) except exc.HTTPNotFound: LOG.info( _LI('The stack %s was not be found during bay' ' deletion.') % stack_id) try: cert_manager.delete_certificates_from_bay(bay) bay.destroy() except exception.BayNotFound: LOG.info(_LI('The bay %s has been deleted by others.') % uuid) return None except Exception: raise self._poll_and_check(osc, bay) return None
def main(): service.prepare_service(sys.argv) gmr.TextGuruMeditation.setup_autorun(version) # Enable object backporting via the conductor base.MagnumObject.indirection_api = base.MagnumObjectIndirectionAPI() app = api_app.setup_app() # Create the WSGI server and start it host, port = cfg.CONF.api.host, cfg.CONF.api.port srv = simple_server.make_server(host, port, app) LOG.info(_LI('Starting server in PID %s') % os.getpid()) LOG.debug("Configuration:") cfg.CONF.log_opt_values(LOG, std_logging.DEBUG) if host == '0.0.0.0': LOG.info(_LI('serving on 0.0.0.0:%(port)s, ' 'view at http://127.0.0.1:%(port)s') % dict(port=port)) else: LOG.info(_LI('serving on http://%(host)s:%(port)s') % dict(host=host, port=port)) srv.serve_forever()
def main(): service.prepare_service(sys.argv) gmr.TextGuruMeditation.setup_autorun(version) # Enable object backporting via the conductor base.MagnumObject.indirection_api = base.MagnumObjectIndirectionAPI() app = api_app.load_app() # Setup OSprofiler for WSGI service profiler.setup('magnum-api', CONF.host) # SSL configuration use_ssl = CONF.api.enabled_ssl # Create the WSGI server and start it host, port = CONF.api.host, CONF.api.port LOG.info(_LI('Starting server in PID %s'), os.getpid()) LOG.debug("Configuration:") CONF.log_opt_values(LOG, logging.DEBUG) LOG.info(_LI('Serving on %(proto)s://%(host)s:%(port)s'), dict(proto="https" if use_ssl else "http", host=host, port=port)) serving.run_simple(host, port, app, ssl_context=_get_ssl_configs(use_ssl))
def main(): service.prepare_service(sys.argv) gmr.TextGuruMeditation.setup_autorun(version) # Enable object backporting via the conductor base.MagnumObject.indirection_api = base.MagnumObjectIndirectionAPI() app = api_app.load_app() # Setup OSprofiler for WSGI service profiler.setup('magnum-api', CONF.host) # SSL configuration use_ssl = CONF.api.enabled_ssl # Create the WSGI server and start it host, port = CONF.api.host, CONF.api.port LOG.info(_LI('Starting server in PID %s'), os.getpid()) LOG.debug("Configuration:") CONF.log_opt_values(LOG, logging.DEBUG) LOG.info(_LI('Serving on %(proto)s://%(host)s:%(port)s'), dict(proto="https" if use_ssl else "http", host=host, port=port)) workers = CONF.api.workers if not workers: workers = processutils.get_worker_count() LOG.info(_LI('Server will handle each request in a new process up to' ' %s concurrent processes'), workers) serving.run_simple(host, port, app, processes=workers, ssl_context=_get_ssl_configs(use_ssl))
def main(): service.prepare_service(sys.argv) gmr.TextGuruMeditation.setup_autorun(version) # Enable object backporting via the conductor base.MagnumObject.indirection_api = base.MagnumObjectIndirectionAPI() app = api_app.load_app() # SSL configuration use_ssl = CONF.api.enabled_ssl # Create the WSGI server and start it host, port = CONF.api.host, CONF.api.port LOG.info(_LI('Starting server in PID %s'), os.getpid()) LOG.debug("Configuration:") CONF.log_opt_values(LOG, logging.DEBUG) LOG.info(_LI('Serving on %(proto)s://%(host)s:%(port)s'), dict(proto="https" if use_ssl else "http", host=host, port=port)) serving.run_simple(host, port, app, ssl_context=_get_ssl_configs(use_ssl))
def bay_delete(self, context, uuid): LOG.debug('bay_heat bay_delete') osc = clients.OpenStackClients(context) bay = objects.Bay.get_by_uuid(context, uuid) self._delete_trustee_and_trust(osc, bay) stack_id = bay.stack_id # NOTE(sdake): This will execute a stack_delete operation. This will # Ignore HTTPNotFound exceptions (stack wasn't present). In the case # that Heat couldn't find the stack representing the bay, likely a user # has deleted the stack outside the context of Magnum. Therefore the # contents of the bay are forever lost. # # If the exception is unhandled, the original exception will be raised. try: osc.heat().stacks.delete(stack_id) except exc.HTTPNotFound: LOG.info(_LI('The stack %s was not be found during bay' ' deletion.'), stack_id) try: cert_manager.delete_certificates_from_bay(bay) bay.destroy() except exception.BayNotFound: LOG.info(_LI('The bay %s has been deleted by others.'), uuid) return None except Exception: raise self._poll_and_check(osc, bay) return None
def main(): service.prepare_service(sys.argv) gmr.TextGuruMeditation.setup_autorun(version) app = api_app.setup_app() # Create the WSGI server and start it host, port = cfg.CONF.api.host, cfg.CONF.api.port srv = simple_server.make_server(host, port, app) LOG.info(_LI('Starting server in PID %s') % os.getpid()) LOG.debug("Configuration:") cfg.CONF.log_opt_values(LOG, std_logging.DEBUG) if host == '0.0.0.0': LOG.info( _LI('serving on 0.0.0.0:%(port)s, ' 'view at http://127.0.0.1:%(port)s') % dict(port=port)) else: LOG.info( _LI('serving on http://%(host)s:%(port)s') % dict(host=host, port=port)) srv.serve_forever()
def sync_bay_status(self, ctx): try: LOG.debug('Starting to sync up bay status') osc = clients.OpenStackClients(ctx) filters = [bay_status.CREATE_IN_PROGRESS, bay_status.UPDATE_IN_PROGRESS, bay_status.DELETE_IN_PROGRESS] bays = objects.Bay.list_all(ctx, filters=filters) if not bays: return sid_to_bay_mapping = {bay.stack_id: bay for bay in bays} bay_stack_ids = sid_to_bay_mapping.keys() stacks = osc.heat().stacks.list(global_tenant=True, filters={'id': bay_stack_ids}) sid_to_stack_mapping = {s.id: s for s in stacks} for sid in (six.viewkeys(sid_to_bay_mapping) & six.viewkeys(sid_to_stack_mapping)): stack = sid_to_stack_mapping[sid] bay = sid_to_bay_mapping[sid] if bay.status != stack.stack_status: old_status = bay.status bay.status = stack.stack_status bay.save() LOG.info(_LI("Sync up bay with id %(id)s from " "%(old_status)s to %(status)s."), {'id': bay.id, 'old_status': old_status, 'status': bay.status}) for sid in (six.viewkeys(sid_to_bay_mapping) - six.viewkeys(sid_to_stack_mapping)): bay = sid_to_bay_mapping[sid] if bay.status == bay_status.DELETE_IN_PROGRESS: bay.destroy() LOG.info(_LI("Bay with id %(id)s has been deleted due " "to stack with id %(sid)s not found in " "Heat."), {'id': bay.id, 'sid': sid}) elif bay.status == bay_status.CREATE_IN_PROGRESS: bay.status = bay_status.CREATE_FAILED bay.save() LOG.info(_LI("Bay with id %(id)s has been set to " "%(status)s due to stack with id %(sid)s " "not found in Heat."), {'id': bay.id, 'status': bay.status, 'sid': sid}) elif bay.status == bay_status.UPDATE_IN_PROGRESS: bay.status = bay_status.UPDATE_FAILED bay.save() LOG.info(_LI("Bay with id %(id)s has been set to " "%(status)s due to stack with id %(sid)s " "not found in Heat."), {'id': bay.id, 'status': bay.status, 'sid': sid}) except Exception as e: LOG.warn(_LW("Ignore error [%s] when syncing up bay status."), e, exc_info=True)
def _is_api_ready(self): try: self.k8s_api.list_namespaced_node() self.LOG.info(_LI("API is ready.")) return True except Exception: self.LOG.info(_LI("API is not ready yet.")) return False
def _delete_complete(self): LOG.info(_LI('Bay has been deleted, stack_id: %s') % self.bay.stack_id) try: cert_manager.delete_certificates_from_bay(self.bay) self.bay.destroy() except exception.BayNotFound: LOG.info( _LI('The bay %s has been deleted by others.') % self.bay.uuid)
def _get_nodes(self): nodes = self._get_nodes_from_bay() if not [x for x in nodes if x]: self.LOG.info(_LI("the list of nodes from bay is empty")) nodes = self._get_nodes_from_stack() if not [x for x in nodes if x]: self.LOG.info(_LI("the list of nodes from stack is empty")) self.LOG.info(_LI("Nodes are: %s") % nodes) return nodes
def _delete_complete(self): LOG.info(_LI('Bay has been deleted, stack_id: %s') % self.bay.stack_id) try: cert_manager.delete_certificates_from_bay(self.bay) self.bay.destroy() except exception.BayNotFound: LOG.info(_LI('The bay %s has been deleted by others.') % self.bay.uuid)
def _sync_deleted_stack(self, bay): try: bay.destroy() except exception.BayNotFound: LOG.info(_LI('The bay %s has been deleted by others.') % bay.uuid) else: LOG.info(_LI("Bay with id %(id)s not found in heat " "with stack id %(sid)s, with status_reason: " "%(reason)."), {'id': bay.id, 'sid': bay.stack_id, 'reason': bay.status_reason})
def _delete_complete(self): LOG.info(_LI('Bay has been deleted, stack_id: %s') % self.bay.stack_id) try: trust_manager.delete_trustee_and_trust(self.openstack_client, self.context, self.bay) cert_manager.delete_certificates_from_bay(self.bay) self.bay.destroy() except exception.BayNotFound: LOG.info( _LI('The bay %s has been deleted by others.') % self.bay.uuid)
def _delete_complete(self): LOG.info(_LI('Cluster has been deleted, stack_id: %s') % self.cluster.stack_id) try: trust_manager.delete_trustee_and_trust(self.openstack_client, self.context, self.cluster) cert_manager.delete_certificates_from_cluster(self.cluster, context=self.context) except exception.ClusterNotFound: LOG.info(_LI('The cluster %s has been deleted by others.') % self.cluster.uuid)
def _sync_deleted_stack(self, cluster): try: cluster.destroy() except exception.ClusterNotFound: LOG.info(_LI('The cluster %s has been deleted by others.'), cluster.uuid) else: LOG.info(_LI("cluster with id %(id)s not found in heat " "with stack id %(sid)s, with status_reason: " "%(reason)s."), {'id': cluster.id, 'sid': cluster.stack_id, 'reason': cluster.status_reason})
def _delete_complete(self): LOG.info(_LI('Bay has been deleted, stack_id: %s') % self.bay.stack_id) try: trust_manager.delete_trustee_and_trust(self.openstack_client, self.context, self.bay) cert_manager.delete_certificates_from_bay(self.bay) self.bay.destroy() except exception.BayNotFound: LOG.info(_LI('The bay %s has been deleted by others.') % self.bay.uuid)
def acquire(self, retry=True): """Acquire a lock on the bay. :param retry: When True, retry if lock was released while stealing. """ lock_conductor_id = objects.BayLock.create(self.bay.uuid, self.conductor_id) if lock_conductor_id is None: LOG.debug( "Conductor %(conductor)s acquired lock on bay " "%(bay)s" % {"conductor": self.conductor_id, "bay": self.bay.uuid} ) return if lock_conductor_id == self.conductor_id or self.conductor_alive(self.context, lock_conductor_id): LOG.debug( "Lock on bay %(bay)s is owned by conductor " "%(conductor)s" % {"bay": self.bay.uuid, "conductor": lock_conductor_id} ) raise exception.OperationInProgress(bay_name=self.bay.name) else: LOG.info( _LI("Stale lock detected on bay %(bay)s. Conductor " "%(conductor)s will attempt to steal the lock"), {"bay": self.bay.uuid, "conductor": self.conductor_id}, ) result = objects.BayLock.steal(self.bay.uuid, lock_conductor_id, self.conductor_id) if result is None: LOG.info( _LI("Conductor %(conductor)s successfully stole the " "lock on bay %(bay)s"), {"conductor": self.conductor_id, "bay": self.bay.uuid}, ) return elif result is True: if retry: LOG.info( _LI( "The lock on bay %(bay)s was released while " "conductor %(conductor)s was stealing it. " "Trying again" ), {"bay": self.bay.uuid, "conductor": self.conductor_id}, ) return self.acquire(retry=False) else: new_lock_conductor_id = result LOG.info( _LI("Failed to steal lock on bay %(bay)s. " "Conductor %(conductor)s stole the lock first"), {"bay": self.bay.uuid, "conductor": new_lock_conductor_id}, ) raise exception.OperationInProgress(bay_name=self.bay.name)
def _delete_complete(self): LOG.info( _LI('Cluster has been deleted, stack_id: %s') % self.cluster.stack_id) try: trust_manager.delete_trustee_and_trust(self.openstack_client, self.context, self.cluster) cert_manager.delete_certificates_from_cluster(self.cluster, context=self.context) except exception.ClusterNotFound: LOG.info( _LI('The cluster %s has been deleted by others.') % self.cluster.uuid)
def _sync_deleted_stack(self, cluster): try: cluster.destroy() except exception.ClusterNotFound: LOG.info(_LI('The cluster %s has been deleted by others.'), cluster.uuid) else: LOG.info( _LI("cluster with id %(id)s not found in heat " "with stack id %(sid)s, with status_reason: " "%(reason)s."), { 'id': cluster.id, 'sid': cluster.stack_id, 'reason': cluster.status_reason })
def main(): magnum_service.prepare_service(sys.argv) gmr.TextGuruMeditation.setup_autorun(version) LOG.info(_LI('Starting server in PID %s'), os.getpid()) LOG.debug("Configuration:") cfg.CONF.log_opt_values(LOG, logging.DEBUG) cfg.CONF.import_opt('topic', 'magnum.conductor.config', group='conductor') conductor_id = short_id.generate_id() endpoints = [ indirection_api.Handler(), docker_conductor.Handler(), k8s_conductor.Handler(), bay_conductor.Handler(), conductor_listener.Handler(), ca_conductor.Handler(), ] server = rpc_service.Service.create(cfg.CONF.conductor.topic, conductor_id, endpoints, binary='magnum-conductor') launcher = service.launch(cfg.CONF, server) launcher.wait()
def bay_delete(self, context, uuid): LOG.debug("bay_heat bay_delete") osc = clients.OpenStackClients(context) bay = objects.Bay.get_by_uuid(context, uuid) stack_id = bay.stack_id # NOTE(sdake): This will execute a stack_delete operation. This will # Ignore HTTPNotFound exceptions (stack wasn't present). In the case # that Heat couldn't find the stack representing the bay, likely a user # has deleted the stack outside the context of Magnum. Therefore the # contents of the bay are forever lost. # # If the exception is unhandled, the original exception will be raised. try: osc.heat().stacks.delete(stack_id) except Exception as e: if isinstance(e, exc.HTTPNotFound): LOG.info(_LI("The stack %s was not be found during bay" " deletion.") % stack_id) bay.destroy() return None else: raise self._poll_and_check(osc, bay) return None
def get_cert(cert_ref, service_name='Magnum', resource_ref=None, check_only=False, **kwargs): """Retrieves the specified cert and registers as a consumer. :param cert_ref: the UUID of the cert to retrieve :param service_name: Friendly name for the consuming service :param resource_ref: Full HATEOAS reference to the consuming resource :param check_only: Read Certificate data without registering :return: Magnum.certificates.common.Cert representation of the certificate data :raises Exception: if certificate retrieval fails """ connection = get_admin_clients().barbican() LOG.info(_LI( "Loading certificate container {0} from Barbican." ).format(cert_ref)) try: if check_only: cert_container = connection.containers.get( container_ref=cert_ref ) else: cert_container = connection.containers.register_consumer( container_ref=cert_ref, name=service_name, url=resource_ref ) return Cert(cert_container) except Exception: with excutils.save_and_reraise_exception(): LOG.exception(_LE("Error getting {0}").format(cert_ref))
def main(): magnum_service.prepare_service(sys.argv) gmr.TextGuruMeditation.setup_autorun(version) LOG.info(_LI('Starting server in PID %s'), os.getpid()) LOG.debug("Configuration:") cfg.CONF.log_opt_values(LOG, logging.DEBUG) cfg.CONF.import_opt('topic', 'magnum.conductor.config', group='conductor') conductor_id = short_id.generate_id() endpoints = [ indirection_api.Handler(), docker_conductor.Handler(), k8s_conductor.Handler(), bay_conductor.Handler(), conductor_listener.Handler(), ca_conductor.Handler(), ] if (not os.path.isfile(cfg.CONF.bay.k8s_atomic_template_path) and not os.path.isfile(cfg.CONF.bay.k8s_coreos_template_path)): LOG.error(_LE("The Heat template can not be found for either k8s " "atomic %(atomic_template)s or coreos " "%(coreos_template)s. Install template first if you " "want to create bay.") % {'atomic_template': cfg.CONF.bay.k8s_atomic_template_path, 'coreos_template': cfg.CONF.bay.k8s_coreos_template_path}) server = rpc_service.Service.create(cfg.CONF.conductor.topic, conductor_id, endpoints, binary='magnum-conductor') launcher = service.launch(cfg.CONF, server) launcher.wait()
def delete_cert(cert_ref, service_name='Magnum', resource_ref=None, **kwargs): """Deregister as a consumer for the specified cert. :param cert_ref: the UUID of the cert to retrieve :param service_name: Friendly name for the consuming service :param resource_ref: Full HATEOAS reference to the consuming resource :raises Exception: if deregistration fails """ connection = get_admin_clients().barbican() LOG.info(_LI( "Deregistering as a consumer of {0} in Barbican." ).format(cert_ref)) try: connection.containers.remove_consumer( container_ref=cert_ref, name=service_name, url=resource_ref ) except Exception: with excutils.save_and_reraise_exception(): LOG.exception(_LE( "Error deregistering as a consumer of {0}" ).format(cert_ref))
def delete_cert(cert_ref, service_name='Magnum', resource_ref=None, **kwargs): """Deletes the specified cert. :param cert_ref: the UUID of the cert to delete :raises Exception: if certificate deletion fails """ connection = get_admin_clients().barbican() LOG.info(_LI( "Recursively deleting certificate container {0} from Barbican." ).format(cert_ref)) try: certificate_container = connection.containers.get(cert_ref) certificate_container.certificate.delete() if certificate_container.intermediates: certificate_container.intermediates.delete() if certificate_container.private_key_passphrase: certificate_container.private_key_passphrase.delete() certificate_container.private_key.delete() certificate_container.delete() except barbican_exc.HTTPClientError: with excutils.save_and_reraise_exception(): LOG.exception(_LE( "Error recursively deleting certificate container {0}" ).format(cert_ref))
def get_cert(cert_ref, service_name='Magnum', resource_ref=None, check_only=False, **kwargs): """Retrieves the specified cert and registers as a consumer. :param cert_ref: the UUID of the cert to retrieve :param service_name: Friendly name for the consuming service :param resource_ref: Full HATEOAS reference to the consuming resource :param check_only: Read Certificate data without registering :return: Magnum.certificates.common.Cert representation of the certificate data :raises Exception: if certificate retrieval fails """ connection = get_admin_clients().barbican() LOG.info(_LI( "Loading certificate container {0} from Barbican." ).format(cert_ref)) try: if check_only: cert_container = connection.containers.get( container_ref=cert_ref ) else: cert_container = connection.containers.register_consumer( container_ref=cert_ref, name=service_name, url=resource_ref ) return Cert(cert_container) except barbican_exc.HTTPClientError: with excutils.save_and_reraise_exception(): LOG.exception(_LE("Error getting {0}").format(cert_ref))
def _actually_delete_cert(cert_ref): """Deletes the specified cert. Very dangerous. Do not recommend. :param cert_ref: the UUID of the cert to delete :raises Exception: if certificate deletion fails """ connection = get_admin_clients().barbican() LOG.info(_LI( "Recursively deleting certificate container {0} from Barbican." ).format(cert_ref)) try: certificate_container = connection.containers.get(cert_ref) certificate_container.certificate.delete() if certificate_container.intermediates: certificate_container.intermediates.delete() if certificate_container.private_key_passphrase: certificate_container.private_key_passphrase.delete() certificate_container.private_key.delete() certificate_container.delete() except Exception: with excutils.save_and_reraise_exception(): LOG.exception(_LE( "Error recursively deleting certificate container {0}" ).format(cert_ref))
def get_removal_nodes(self, hosts_output): if not self._is_scale_down(): return list() cluster = self.new_cluster stack = self.osclient.heat().stacks.get(cluster.stack_id) hosts = hosts_output.get_output_value(stack) if hosts is None: raise exception.MagnumException(_( "Output key '%(output_key)s' is missing from stack " "%(stack_id)s") % {'output_key': hosts_output.heat_output, 'stack_id': stack.id}) hosts_with_container = self._get_hosts_with_container(self.context, cluster) hosts_no_container = list(set(hosts) - hosts_with_container) LOG.debug('List of hosts that has no container: %s', str(hosts_no_container)) num_of_removal = self._get_num_of_removal() if len(hosts_no_container) < num_of_removal: LOG.warning(_LW( "About to remove %(num_removal)d nodes, which is larger than " "the number of empty nodes (%(num_empty)d). %(num_non_empty)d " "non-empty nodes will be removed."), { 'num_removal': num_of_removal, 'num_empty': len(hosts_no_container), 'num_non_empty': num_of_removal - len(hosts_no_container)}) hosts_to_remove = hosts_no_container[0:num_of_removal] LOG.info(_LI('Require removal of hosts: %s'), hosts_to_remove) return hosts_to_remove
def _actually_delete_cert(cert_ref): """Deletes the specified cert. Very dangerous. Do not recommend. :param cert_ref: the UUID of the cert to delete :raises Exception: if certificate deletion fails """ connection = get_admin_clients().barbican() LOG.info( _LI("Recursively deleting certificate container {0} from Barbican." ).format(cert_ref)) try: certificate_container = connection.containers.get(cert_ref) certificate_container.certificate.delete() if certificate_container.intermediates: certificate_container.intermediates.delete() if certificate_container.private_key_passphrase: certificate_container.private_key_passphrase.delete() certificate_container.private_key.delete() certificate_container.delete() except Exception: with excutils.save_and_reraise_exception(): LOG.exception( _LE("Error recursively deleting certificate container {0}" ).format(cert_ref))
def do_copy_logs(prefix, nodes_address): if not nodes_address: return msg = _LI("copy logs from : %s") % ','.join(nodes_address) cls.LOG.info(msg) log_name = prefix + "-" + func_name for node_address in nodes_address: try: cls.LOG.debug("running %s" % full_location) cls.LOG.debug("keypair: %s" % keypair) subprocess.check_call([ full_location, node_address, coe, log_name, str(keypair) ]) except Exception: cls.LOG.error(msg) msg = (_LE("failed to copy from %(node_address)s " "to %(base_path)s%(log_name)s-" "%(node_address)s") % {'node_address': node_address, 'base_path': "/opt/stack/logs/bay-nodes/", 'log_name': log_name}) cls.LOG.exception(msg)
def main(): logging.register_options(cfg.CONF) cfg.CONF(sys.argv[1:], project='magnum') logging.setup(cfg.CONF, 'magnum') LOG.info(_LI('Starting server in PID %s') % os.getpid()) LOG.debug("Configuration:") cfg.CONF.log_opt_values(LOG, std_logging.DEBUG) cfg.CONF.import_opt('topic', 'magnum.conductor.config', group='conductor') conductor_id = short_id.generate_id() endpoints = [ docker_conductor.Handler(), k8s_conductor.Handler(), bay_conductor.Handler(), conductor_listener.Handler(), ] if (not os.path.isfile(cfg.CONF.bay.k8s_atomic_template_path) and not os.path.isfile(cfg.CONF.bay.k8s_coreos_template_path)): LOG.error(_LE("The Heat template can not be found for either k8s " "atomic %(atomic_template)s or coreos " "(coreos_template)%s. Install template first if you " "want to create bay.") % {'atomic_template': cfg.CONF.bay.k8s_atomic_template_path, 'coreos_template': cfg.CONF.bay.k8s_coreos_template_path}) server = service.Service(cfg.CONF.conductor.topic, conductor_id, endpoints) server.serve()
def main(): magnum_service.prepare_service(sys.argv) gmr.TextGuruMeditation.setup_autorun(version) LOG.info(_LI('Starting server in PID %s'), os.getpid()) LOG.debug("Configuration:") cfg.CONF.log_opt_values(LOG, logging.DEBUG) cfg.CONF.import_opt('topic', 'magnum.conductor.config', group='conductor') conductor_id = short_id.generate_id() endpoints = [ indirection_api.Handler(), cluster_conductor.Handler(), conductor_listener.Handler(), ca_conductor.Handler(), ] server = rpc_service.Service.create(cfg.CONF.conductor.topic, conductor_id, endpoints, binary='magnum-conductor') launcher = service.launch(cfg.CONF, server) launcher.wait()
def delete_cert(cert_ref, service_name='Magnum', resource_ref=None, **kwargs): """Deregister as a consumer for the specified cert. :param cert_ref: the UUID of the cert to retrieve :param service_name: Friendly name for the consuming service :param resource_ref: Full HATEOAS reference to the consuming resource :raises Exception: if deregistration fails """ connection = get_admin_clients().barbican() LOG.info( _LI("Deregistering as a consumer of {0} in Barbican.").format( cert_ref)) try: connection.containers.remove_consumer(container_ref=cert_ref, name=service_name, url=resource_ref) except Exception: with excutils.save_and_reraise_exception(): LOG.exception( _LE("Error deregistering as a consumer of {0}").format( cert_ref))
def main(): magnum_service.prepare_service(sys.argv) gmr.TextGuruMeditation.setup_autorun(version) LOG.info(_LI('Starting server in PID %s') % os.getpid()) LOG.debug("Configuration:") cfg.CONF.log_opt_values(LOG, std_logging.DEBUG) cfg.CONF.import_opt('topic', 'magnum.conductor.config', group='conductor') conductor_id = short_id.generate_id() endpoints = [ docker_conductor.Handler(), k8s_conductor.Handler(), bay_conductor.Handler(), x509keypair_conductor.Handler(), conductor_listener.Handler(), ca_conductor.Handler(), ] if (not os.path.isfile(cfg.CONF.bay.k8s_atomic_template_path) and not os.path.isfile(cfg.CONF.bay.k8s_coreos_template_path)): LOG.error(_LE("The Heat template can not be found for either k8s " "atomic %(atomic_template)s or coreos " "(coreos_template)%s. Install template first if you " "want to create bay.") % {'atomic_template': cfg.CONF.bay.k8s_atomic_template_path, 'coreos_template': cfg.CONF.bay.k8s_coreos_template_path}) server = rpc_service.Service.create(cfg.CONF.conductor.topic, conductor_id, endpoints) launcher = service.launch(cfg.CONF, server) launcher.wait()
def cluster_delete(self, context, uuid): LOG.debug('cluster_heat cluster_delete') osc = clients.OpenStackClients(context) cluster = objects.Cluster.get_by_uuid(context, uuid) stack_id = cluster.stack_id # NOTE(sdake): This will execute a stack_delete operation. This will # Ignore HTTPNotFound exceptions (stack wasn't present). In the case # that Heat couldn't find the stack representing the cluster, likely a # user has deleted the stack outside the context of Magnum. Therefore # the contents of the cluster are forever lost. # # If the exception is unhandled, the original exception will be raised. try: conductor_utils.notify_about_cluster_operation( context, taxonomy.ACTION_DELETE, taxonomy.OUTCOME_PENDING) osc.heat().stacks.delete(stack_id) except exc.HTTPNotFound: LOG.info( _LI('The stack %s was not found during cluster' ' deletion.'), stack_id) try: trust_manager.delete_trustee_and_trust(osc, context, cluster) cert_manager.delete_certificates_from_cluster(cluster, context=context) cluster.destroy() except exception.ClusterNotFound: LOG.info(_LI('The cluster %s has been deleted by others.'), uuid) conductor_utils.notify_about_cluster_operation( context, taxonomy.ACTION_DELETE, taxonomy.OUTCOME_SUCCESS) return None except exc.HTTPConflict: conductor_utils.notify_about_cluster_operation( context, taxonomy.ACTION_DELETE, taxonomy.OUTCOME_FAILURE) raise exception.OperationInProgress(cluster_name=cluster.name) except Exception: conductor_utils.notify_about_cluster_operation( context, taxonomy.ACTION_DELETE, taxonomy.OUTCOME_FAILURE) raise cluster.status = fields.ClusterStatus.DELETE_IN_PROGRESS cluster.save() self._poll_and_check(osc, cluster) return None
def poll_and_check(self): # TODO(yuanying): temporary implementation to update api_address, # node_addresses and bay status stack = self.openstack_client.heat().stacks.get(self.bay.stack_id) self.attempts += 1 # poll_and_check is detached and polling long time to check status, # so another user/client can call delete bay/stack. if stack.stack_status == 'DELETE_COMPLETE': LOG.info(_LI('Bay has been deleted, stack_id: %s') % self.bay.stack_id) self.bay.destroy() raise loopingcall.LoopingCallDone() if (stack.stack_status in ['CREATE_COMPLETE', 'UPDATE_COMPLETE']): _update_stack_outputs(self.context, stack, self.bay) self.bay.status = stack.stack_status self.bay.save() raise loopingcall.LoopingCallDone() elif stack.stack_status != self.bay.status: self.bay.status = stack.stack_status self.bay.save() if stack.stack_status == 'CREATE_FAILED': LOG.error(_LE('Unable to create bay, stack_id: %(stack_id)s, ' 'reason: %(reason)s') % {'stack_id': self.bay.stack_id, 'reason': stack.stack_status_reason}) raise loopingcall.LoopingCallDone() if stack.stack_status == 'DELETE_FAILED': LOG.error(_LE('Unable to delete bay, stack_id: %(stack_id)s, ' 'reason: %(reason)s') % {'stack_id': self.bay.stack_id, 'reason': stack.stack_status_reason}) raise loopingcall.LoopingCallDone() if stack.stack_status == 'UPDATE_FAILED': LOG.error(_LE('Unable to update bay, stack_id: %(stack_id)s, ' 'reason: %(reason)s') % {'stack_id': self.bay.stack_id, 'reason': stack.stack_status_reason}) raise loopingcall.LoopingCallDone() # only check max attempts when the stack is being created when # the timeout hasn't been set. If the timeout has been set then # the loop will end when the stack completes or the timeout occurs if stack.stack_status == 'CREATE_IN_PROGRESS': if (stack.timeout_mins is None and self.attempts > cfg.CONF.k8s_heat.max_attempts): LOG.error(_LE('Bay check exit after %(attempts)s attempts,' 'stack_id: %(id)s, stack_status: %(status)s') % {'attempts': cfg.CONF.k8s_heat.max_attempts, 'id': self.bay.stack_id, 'status': stack.stack_status}) raise loopingcall.LoopingCallDone() else: if self.attempts > cfg.CONF.k8s_heat.max_attempts: LOG.error(_LE('Bay check exit after %(attempts)s attempts,' 'stack_id: %(id)s, stack_status: %(status)s') % {'attempts': cfg.CONF.k8s_heat.max_attempts, 'id': self.bay.stack_id, 'status': stack.stack_status}) raise loopingcall.LoopingCallDone()
def cluster_delete(self, context, uuid): LOG.debug('cluster_heat cluster_delete') osc = clients.OpenStackClients(context) cluster = objects.Cluster.get_by_uuid(context, uuid) stack_id = cluster.stack_id # NOTE(sdake): This will execute a stack_delete operation. This will # Ignore HTTPNotFound exceptions (stack wasn't present). In the case # that Heat couldn't find the stack representing the cluster, likely a # user has deleted the stack outside the context of Magnum. Therefore # the contents of the cluster are forever lost. # # If the exception is unhandled, the original exception will be raised. try: conductor_utils.notify_about_cluster_operation( context, taxonomy.ACTION_DELETE, taxonomy.OUTCOME_PENDING) osc.heat().stacks.delete(stack_id) except exc.HTTPNotFound: LOG.info(_LI('The stack %s was not found during cluster' ' deletion.'), stack_id) try: trust_manager.delete_trustee_and_trust(osc, context, cluster) cert_manager.delete_certificates_from_cluster(cluster, context=context) cluster.destroy() except exception.ClusterNotFound: LOG.info(_LI('The cluster %s has been deleted by others.'), uuid) conductor_utils.notify_about_cluster_operation( context, taxonomy.ACTION_DELETE, taxonomy.OUTCOME_SUCCESS) return None except exc.HTTPConflict: conductor_utils.notify_about_cluster_operation( context, taxonomy.ACTION_DELETE, taxonomy.OUTCOME_FAILURE) raise exception.OperationInProgress(cluster_name=cluster.name) except Exception: conductor_utils.notify_about_cluster_operation( context, taxonomy.ACTION_DELETE, taxonomy.OUTCOME_FAILURE) raise cluster.status = fields.ClusterStatus.DELETE_IN_PROGRESS cluster.save() self._poll_and_check(osc, cluster) return None
def setup(binary, host): if hasattr(CONF, 'profiler') and CONF.profiler.enabled: profiler_initializer.init_from_conf( conf=CONF, context=context.get_admin_context().to_dict(), project="magnum", service=binary, host=host) LOG.info(_LI("OSprofiler is enabled."))
def _container_operation(self, func, *args, **kwargs): # NOTE(hongbin): Swarm cluster occasionally aborts the connection, # so we re-try the operation several times here. In long-term, we # need to investigate the cause of this issue. See bug #1583337. for i in range(150): try: self.LOG.info(_LI("Calling function ") + func.__name__) return func(*args, **kwargs) except req_exceptions.ConnectionError: self.LOG.info(_LI("Connection aborted on calling Swarm API. " "Will retry in 2 seconds.")) except errors.APIError as e: if e.response.status_code != 500: raise self.LOG.info(_LI("Internal Server Error: ") + str(e)) time.sleep(2) raise Exception("Cannot connect to Swarm API.")
def setup(binary, host): if CONF.profiler.enabled: profiler_initializer.init_from_conf( conf=CONF, context=context.get_admin_context().to_dict(), project="magnum", service=binary, host=host) LOG.info(_LI("OSprofiler is enabled."))
def _sync_missing_stack(self, new_status): self.cluster.status = new_status self.cluster.status_reason = _("Stack with id %s not found in " "Heat.") % self.cluster.stack_id self.cluster.save() LOG.info(_LI("Cluster with id %(id)s has been set to " "%(status)s due to stack with id %(sid)s " "not found in Heat."), {'id': self.cluster.id, 'status': self.cluster.status, 'sid': self.cluster.stack_id})
def _container_operation(self, func, *args, **kwargs): # NOTE(hongbin): Swarm cluster occasionally aborts the connection, # so we re-try the operation several times here. In long-term, we # need to investigate the cause of this issue. See bug #1583337. for i in range(150): try: self.LOG.info(_LI("Calling function ") + func.__name__) return func(*args, **kwargs) except req_exceptions.ConnectionError: self.LOG.info( _LI("Connection aborted on calling Swarm API. " "Will retry in 2 seconds.")) except errors.APIError as e: if e.response.status_code != 500: raise self.LOG.info(_LI("Internal Server Error: ") + str(e)) time.sleep(2) raise Exception("Cannot connect to Swarm API.")
def _sync_missing_stack(self, bay, new_status): bay.status = new_status bay.status_reason = _("Stack with id %s not found in " "Heat.") % bay.stack_id bay.save() LOG.info(_LI("Bay with id %(id)s has been set to " "%(status)s due to stack with id %(sid)s " "not found in Heat."), {'id': bay.id, 'status': bay.status, 'sid': bay.stack_id})
def _sync_existing_bay(self, bay, stack): if bay.status != stack.stack_status: old_status = bay.status bay.status = stack.stack_status bay.status_reason = stack.stack_status_reason bay.save() LOG.info(_LI("Sync up bay with id %(id)s from " "%(old_status)s to %(status)s."), {'id': bay.id, 'old_status': old_status, 'status': bay.status})
def cluster_delete(self, context, uuid): LOG.debug('cluster_heat cluster_delete') osc = clients.OpenStackClients(context) cluster = objects.Cluster.get_by_uuid(context, uuid) ct = conductor_utils.retrieve_cluster_template(context, cluster) cluster_driver = driver.Driver.get_driver(ct.server_type, ct.cluster_distro, ct.coe) try: conductor_utils.notify_about_cluster_operation( context, taxonomy.ACTION_DELETE, taxonomy.OUTCOME_PENDING) cluster_driver.delete_stack(context, osc, cluster) except exc.HTTPNotFound: LOG.info( _LI('The stack %s was not found during cluster' ' deletion.'), cluster.stack_id) try: trust_manager.delete_trustee_and_trust(osc, context, cluster) cert_manager.delete_certificates_from_cluster(cluster, context=context) cluster.destroy() except exception.ClusterNotFound: LOG.info(_LI('The cluster %s has been deleted by others.'), uuid) conductor_utils.notify_about_cluster_operation( context, taxonomy.ACTION_DELETE, taxonomy.OUTCOME_SUCCESS) return None except exc.HTTPConflict: conductor_utils.notify_about_cluster_operation( context, taxonomy.ACTION_DELETE, taxonomy.OUTCOME_FAILURE) raise exception.OperationInProgress(cluster_name=cluster.name) except Exception: conductor_utils.notify_about_cluster_operation( context, taxonomy.ACTION_DELETE, taxonomy.OUTCOME_FAILURE) raise cluster.status = fields.ClusterStatus.DELETE_IN_PROGRESS cluster.save() self._poll_and_check(osc, cluster, cluster_driver) return None
def bay_delete(self, context, uuid): LOG.debug('bay_heat bay_delete') osc = clients.OpenStackClients(context) bay = objects.Bay.get_by_uuid(context, uuid) stack_id = bay.stack_id # NOTE(sdake): This will execute a stack_delete operation. This will # Ignore HTTPNotFound exceptions (stack wasn't present). In the case # that Heat couldn't find the stack representing the bay, likely a user # has deleted the stack outside the context of Magnum. Therefore the # contents of the bay are forever lost. # # If the exception is unhandled, the original exception will be raised. try: conductor_utils.notify_about_bay_operation( context, taxonomy.ACTION_DELETE, taxonomy.OUTCOME_PENDING) osc.heat().stacks.delete(stack_id) except exc.HTTPNotFound: LOG.info( _LI('The stack %s was not be found during bay' ' deletion.'), stack_id) try: trust_manager.delete_trustee_and_trust(osc, context, bay) cert_manager.delete_certificates_from_bay(bay, context=context) bay.destroy() except exception.BayNotFound: LOG.info(_LI('The bay %s has been deleted by others.'), uuid) conductor_utils.notify_about_bay_operation( context, taxonomy.ACTION_DELETE, taxonomy.OUTCOME_SUCCESS) return None except exc.HTTPConflict: conductor_utils.notify_about_bay_operation( context, taxonomy.ACTION_DELETE, taxonomy.OUTCOME_FAILURE) raise exception.OperationInProgress(bay_name=bay.name) except Exception: conductor_utils.notify_about_bay_operation( context, taxonomy.ACTION_DELETE, taxonomy.OUTCOME_FAILURE) raise self._poll_and_check(osc, bay) return None
def is_cluster_in_final_state(self, cluster_id): try: resp, model = self.get_cluster(cluster_id) if model.status in ['CREATED', 'CREATE_COMPLETE', 'ERROR', 'CREATE_FAILED']: self.LOG.info(_LI('Cluster %s succeeded.') % cluster_id) return True else: return False except exceptions.NotFound: self.LOG.warning(_LW('Cluster %s is not found.') % cluster_id) return False
def _service_admin_creds(self): # Import auth_token to have keystone_authtoken settings setup. importutils.import_module('keystonemiddleware.auth_token') creds = { 'username': cfg.CONF.keystone_authtoken.admin_user, 'password': cfg.CONF.keystone_authtoken.admin_password, 'auth_url': self.v3_endpoint, 'endpoint': self.v3_endpoint, 'project_name': cfg.CONF.keystone_authtoken.admin_tenant_name } LOG.info(_LI('admin creds %s') % creds) return creds
def load_app(): cfg_file = None cfg_path = cfg.CONF.api.api_paste_config if not os.path.isabs(cfg_path): cfg_file = CONF.find_file(cfg_path) elif os.path.exists(cfg_path): cfg_file = cfg_path if not cfg_file: raise cfg.ConfigFilesNotFoundError([cfg.CONF.api.api_paste_config]) LOG.info(_LI("Full WSGI config used: %s"), cfg_file) return deploy.loadapp("config:" + cfg_file)
def _sync_existing_cluster(self, cluster, stack): if cluster.status != stack.stack_status: old_status = cluster.status cluster.status = stack.stack_status cluster.status_reason = stack.stack_status_reason cluster.save() LOG.info( _LI("Sync up cluster with id %(id)s from " "%(old_status)s to %(status)s."), { 'id': cluster.id, 'old_status': old_status, 'status': cluster.status })
def main(): service.prepare_service(sys.argv) gmr.TextGuruMeditation.setup_autorun(version) # Enable object backporting via the conductor base.MagnumObject.indirection_api = base.MagnumObjectIndirectionAPI() app = api_app.load_app() # Create the WSGI server and start it host, port = cfg.CONF.api.host, cfg.CONF.api.port srv = simple_server.make_server(host, port, app) LOG.info(_LI('Starting server in PID %s'), os.getpid()) LOG.debug("Configuration:") cfg.CONF.log_opt_values(LOG, logging.DEBUG) LOG.info(_LI('serving on http://%(host)s:%(port)s'), dict(host=host, port=port)) srv.serve_forever()
def _sync_missing_stack(self, new_status): self.cluster.status = new_status self.cluster.status_reason = _("Stack with id %s not found in " "Heat.") % self.cluster.stack_id self.cluster.save() LOG.info( _LI("Cluster with id %(id)s has been set to " "%(status)s due to stack with id %(sid)s " "not found in Heat."), { 'id': self.cluster.id, 'status': self.cluster.status, 'sid': self.cluster.stack_id })