def _send_cluster_metrics(self, ctx): LOG.debug('Starting to send cluster metrics') for cluster in objects.Cluster.list(ctx): if cluster.status not in [ fields.ClusterStatus.CREATE_COMPLETE, fields.ClusterStatus.UPDATE_COMPLETE ]: continue monitor = monitors.create_monitor(ctx, cluster) if monitor is None: continue try: monitor.pull_data() except Exception as e: LOG.warning(_LW( "Skip pulling data from cluster %(cluster)s due to " "error: %(e)s"), { 'e': e, 'cluster': cluster.uuid }, exc_info=True) continue metrics = list() for name in monitor.get_metric_names(): try: metric = { 'name': name, 'value': monitor.compute_metric_value(name), 'unit': monitor.get_metric_unit(name), } metrics.append(metric) except Exception as e: LOG.warning(_LW("Skip adding metric %(name)s due to " "error: %(e)s"), { 'e': e, 'name': name }, exc_info=True) message = dict(metrics=metrics, user_id=cluster.user_id, project_id=cluster.project_id, resource_id=cluster.uuid) LOG.debug("About to send notification: '%s'", message) self.notifier.info(ctx, "magnum.cluster.metrics.update", message)
def sync_cluster_status(self, ctx): try: LOG.debug('Starting to sync up cluster status') # get all the clusters that are IN_PROGRESS status = [objects.fields.ClusterStatus.CREATE_IN_PROGRESS, objects.fields.ClusterStatus.UPDATE_IN_PROGRESS, objects.fields.ClusterStatus.DELETE_IN_PROGRESS, objects.fields.ClusterStatus.ROLLBACK_IN_PROGRESS] filters = {'status': status} clusters = objects.Cluster.list(ctx, filters=filters) if not clusters: return # synchronize with underlying orchestration for cluster in clusters: job = ClusterUpdateJob(ctx, cluster) # though this call isn't really looping, we use this # abstraction anyway to avoid dealing directly with eventlet # hooey lc = loopingcall.FixedIntervalLoopingCall(f=job.update_status) lc.start(1, stop_on_exception=True) except Exception as e: LOG.warning(_LW( "Ignore error [%s] when syncing up cluster status." ), e, exc_info=True)
def does_cluster_not_exist(self, cluster_id): try: self.get_cluster(cluster_id) except exceptions.NotFound: self.LOG.warning(_LW('Cluster %s is not found.'), cluster_id) return True return False
def sync_cluster_status(self, ctx): try: LOG.debug('Starting to sync up cluster status') # get all the clusters that are IN_PROGRESS status = [ objects.fields.ClusterStatus.CREATE_IN_PROGRESS, objects.fields.ClusterStatus.UPDATE_IN_PROGRESS, objects.fields.ClusterStatus.DELETE_IN_PROGRESS, objects.fields.ClusterStatus.ROLLBACK_IN_PROGRESS ] filters = {'status': status} clusters = objects.Cluster.list(ctx, filters=filters) if not clusters: return # synchronize with underlying orchestration for cluster in clusters: job = ClusterUpdateJob(ctx, cluster) # though this call isn't really looping, we use this # abstraction anyway to avoid dealing directly with eventlet # hooey lc = loopingcall.FixedIntervalLoopingCall(f=job.update_status) lc.start(1, stop_on_exception=True) except Exception as e: LOG.warning( _LW("Ignore error [%s] when syncing up cluster status."), e, exc_info=True)
def delete_cert(cert_ref, **kwargs): """Deletes the specified cert. :param cert_ref: the UUID of the cert to delete :raises CertificateStorageException: if certificate deletion fails """ LOG.warn( _LW("Deleting certificate {0} from the local filesystem. " "CertManager type 'local' should be used for testing purpose." ).format(cert_ref)) filename_base = os.path.join(CONF.certificates.storage_path, cert_ref) filename_certificate = "{0}.crt".format(filename_base) filename_private_key = "{0}.key".format(filename_base) filename_intermediates = "{0}.int".format(filename_base) filename_pkp = "{0}.pass".format(filename_base) try: os.remove(filename_certificate) os.remove(filename_private_key) if path.isfile(filename_intermediates): os.remove(filename_intermediates) if path.isfile(filename_pkp): os.remove(filename_pkp) except IOError as ioe: LOG.error( _LE("Failed to delete certificate {0}.").format(cert_ref)) raise exception.CertificateStorageException(msg=ioe.message)
def get_output_value(self, stack): for output in stack.to_dict().get('outputs', []): if output['output_key'] == self.heat_output: return output['output_value'] LOG.warning(_LW('stack does not have output_key %s'), self.heat_output) return None
def does_bay_not_exist(self, bay_id): try: self.get_bay(bay_id) except exceptions.NotFound: self.LOG.warning(_LW('Bay %s is not found.'), bay_id) return True return False
def does_bay_not_exist(self, bay_id): try: self.get_bay(bay_id) except exceptions.NotFound: self.LOG.warning(_LW('Bay %s is not found.') % bay_id) return True return False
def _send_bay_metrics(self, ctx): LOG.debug('Starting to send bay metrics') for bay in objects.Bay.list(ctx): data = None try: data = monitor.pull_data(list(bay.node_addresses)) except Exception as e: LOG.warn(_LW("Skip pulling data from bay %(bay)s due to " "error: %(e)s"), {'e': e, 'bay': bay.uuid}, exc_info=True) continue metrics = [] metric = { 'name': 'memory_util', 'unit': '%', 'value': data } metrics.append(metric) message = dict(metrics=metrics, user_id=bay.user_id, project_id=bay.project_id, resource_id=bay.uuid) LOG.debug("About to send notification: '%s'" % message) rpc.get_notifier().info(ctx, "magnum.bay.metrics.update", message)
def sync_bay_status(self, ctx): try: LOG.debug('Starting to sync up bay status') osc = clients.OpenStackClients(ctx) status = [bay_status.CREATE_IN_PROGRESS, bay_status.UPDATE_IN_PROGRESS, bay_status.DELETE_IN_PROGRESS] filters = {'status': status} bays = objects.Bay.list(ctx, filters=filters) if not bays: return sid_to_bay_mapping = {bay.stack_id: bay for bay in bays} bay_stack_ids = sid_to_bay_mapping.keys() stacks = osc.heat().stacks.list(global_tenant=True, filters={'id': bay_stack_ids}) sid_to_stack_mapping = {s.id: s for s in stacks} # intersection of bays magnum has and heat has for sid in (six.viewkeys(sid_to_bay_mapping) & six.viewkeys(sid_to_stack_mapping)): stack = sid_to_stack_mapping[sid] bay = sid_to_bay_mapping[sid] self._sync_existing_bay(bay, stack) # the stacks that magnum has but heat doesn't have for sid in (six.viewkeys(sid_to_bay_mapping) - six.viewkeys(sid_to_stack_mapping)): bay = sid_to_bay_mapping[sid] self._sync_missing_heat_stack(bay) except Exception as e: LOG.warn(_LW("Ignore error [%s] when syncing up bay status."), e, exc_info=True)
def _collect_fault_info(self, context, cluster): """Collect fault info from heat resources of given cluster and store them into cluster.faults. """ osc = clients.OpenStackClients(context) filters = {'status': 'FAILED'} try: failed_resources = osc.heat().resources.list(cluster.stack_id, nested_depth=2, filters=filters) except Exception as e: failed_resources = [] LOG.warning(_LW("Failed to retrieve failed resources for " "cluster %(cluster)s from Heat stack " "%(stack)s due to error: %(e)s"), { 'cluster': cluster.uuid, 'stack': cluster.stack_id, 'e': e }, exc_info=True) return { res.resource_name: res.resource_status_reason for res in failed_resources }
def get_removal_nodes(self, hosts_output): if not self._is_scale_down(): return list() cluster = self.new_cluster stack = self.osclient.heat().stacks.get(cluster.stack_id) hosts = hosts_output.get_output_value(stack) if hosts is None: raise exception.MagnumException(_( "Output key '%(output_key)s' is missing from stack " "%(stack_id)s") % {'output_key': hosts_output.heat_output, 'stack_id': stack.id}) hosts_with_container = self._get_hosts_with_container(self.context, cluster) hosts_no_container = list(set(hosts) - hosts_with_container) LOG.debug('List of hosts that has no container: %s', str(hosts_no_container)) num_of_removal = self._get_num_of_removal() if len(hosts_no_container) < num_of_removal: LOG.warning(_LW( "About to remove %(num_removal)d nodes, which is larger than " "the number of empty nodes (%(num_empty)d). %(num_non_empty)d " "non-empty nodes will be removed."), { 'num_removal': num_of_removal, 'num_empty': len(hosts_no_container), 'num_non_empty': num_of_removal - len(hosts_no_container)}) hosts_to_remove = hosts_no_container[0:num_of_removal] LOG.info(_LI('Require removal of hosts: %s'), hosts_to_remove) return hosts_to_remove
def sync_bay_status(self, ctx): try: LOG.debug('Starting to sync up bay status') osc = clients.OpenStackClients(ctx) status = [bay_status.CREATE_IN_PROGRESS, bay_status.UPDATE_IN_PROGRESS, bay_status.DELETE_IN_PROGRESS] filters = {'status': status} bays = objects.Bay.list(ctx, filters=filters) if not bays: return sid_to_bay_mapping = {bay.stack_id: bay for bay in bays} bay_stack_ids = sid_to_bay_mapping.keys() stacks = osc.heat().stacks.list(global_tenant=True, filters={'id': bay_stack_ids}) sid_to_stack_mapping = {s.id: s for s in stacks} # intersection of bays magnum has and heat has for sid in (six.viewkeys(sid_to_bay_mapping) & six.viewkeys(sid_to_stack_mapping)): stack = sid_to_stack_mapping[sid] bay = sid_to_bay_mapping[sid] self._sync_existing_bay(bay, stack) # the stacks that magnum has but heat doesn't have for sid in (six.viewkeys(sid_to_bay_mapping) - six.viewkeys(sid_to_stack_mapping)): bay = sid_to_bay_mapping[sid] self._sync_missing_heat_stack(bay) except Exception as e: LOG.warning(_LW( "Ignore error [%s] when syncing up bay status." ), e, exc_info=True)
def _get_legacy_auth(self): LOG.warning( _LW('Auth plugin and its options for service user ' 'must be provided in [%(new)s] section. ' 'Using values from [%(old)s] section is ' 'deprecated.') % { 'new': ksconf.CFG_GROUP, 'old': ksconf.CFG_LEGACY_GROUP }) conf = getattr(CONF, ksconf.CFG_LEGACY_GROUP) # FIXME(htruta, pauloewerton): Conductor layer does not have # new v3 variables, such as project_name and project_domain_id. # The use of admin_* variables is related to Identity API v2.0, # which is now deprecated. We should also stop using hard-coded # domain info, as well as variables that refer to `tenant`, # as they are also v2 related. auth = ka_v3.Password(auth_url=self.auth_url, username=conf.admin_user, password=conf.admin_password, project_name=conf.admin_tenant_name, project_domain_id='default', user_domain_id='default') return auth
def _get_bay_stacks(self, bays, sid_to_bay_mapping, bay_stack_ids): stacks = [] _bays = bays _sid_to_bay_mapping = sid_to_bay_mapping _bay_stack_ids = bay_stack_ids for bay in _bays: try: # Create client with bay's trustee user context bosc = clients.OpenStackClients( context.make_bay_context(bay)) stack = bosc.heat().stacks.get(bay.stack_id) stacks.append(stack) # No need to do anything in this case except heat_exc.HTTPNotFound: pass except Exception as e: # Any other exception means we do not perform any # action on this bay in the current sync run, so remove # it from all records. LOG.warning(_LW("Exception while attempting to retrieve " "Heat stack %(stack_id)s for bay %(bay_id)s. " "Traceback follows."), {'stack_id': bay.stack_id, 'bay_id': bay.id}) LOG.warning(e) _sid_to_bay_mapping.pop(bay.stack_id) _bay_stack_ids.remove(bay.stack_id) _bays.remove(bay) return [stacks, _bays, _bay_stack_ids, _sid_to_bay_mapping]
def delete_cert(cert_ref, **kwargs): """Deletes the specified cert. :param cert_ref: the UUID of the cert to delete :raises CertificateStorageException: if certificate deletion fails """ LOG.warning(_LW( "Deleting certificate {0} from the local filesystem. " "CertManager type 'local' should be used for testing purpose." ).format(cert_ref)) filename_base = os.path.join(CONF.certificates.storage_path, cert_ref) filename_certificate = "{0}.crt".format(filename_base) filename_private_key = "{0}.key".format(filename_base) filename_intermediates = "{0}.int".format(filename_base) filename_pkp = "{0}.pass".format(filename_base) try: os.remove(filename_certificate) os.remove(filename_private_key) if path.isfile(filename_intermediates): os.remove(filename_intermediates) if path.isfile(filename_pkp): os.remove(filename_pkp) except IOError as ioe: LOG.error(_LE( "Failed to delete certificate {0}." ).format(cert_ref)) raise exception.CertificateStorageException(msg=ioe.message)
def rmtree_without_raise(path): try: if os.path.isdir(path): shutil.rmtree(path) except OSError as e: LOG.warning(_LW("Failed to remove dir %(path)s, error: %(e)s"), {'path': path, 'e': e})
def get_output_value(self, stack): for output in stack.to_dict().get("outputs", []): if output["output_key"] == self.heat_output: return output["output_value"] LOG.warning(_LW("stack does not have output_key %s"), self.heat_output) return None
def _get_cluster_stacks( self, clusters, sid_to_cluster_mapping, cluster_stack_ids): stacks = [] _clusters = clusters _sid_to_cluster_mapping = sid_to_cluster_mapping _cluster_stack_ids = cluster_stack_ids for cluster in _clusters: try: # Create client with cluster's trustee user context bosc = clients.OpenStackClients( context.make_cluster_context(cluster)) stack = bosc.heat().stacks.get(cluster.stack_id) stacks.append(stack) # No need to do anything in this case except heat_exc.HTTPNotFound: pass except Exception as e: # Any other exception means we do not perform any # action on this cluster in the current sync run, so remove # it from all records. LOG.warning( _LW("Exception while attempting to retrieve " "Heat stack %(stack_id)s for cluster %(cluster_id)s. " "Traceback follows."), {'stack_id': cluster.stack_id, 'cluster_id': cluster.id}) LOG.warning(e) _sid_to_cluster_mapping.pop(cluster.stack_id) _cluster_stack_ids.remove(cluster.stack_id) _clusters.remove(cluster) return [stacks, _clusters, _cluster_stack_ids, _sid_to_cluster_mapping]
def rmtree_without_raise(path): try: if os.path.isdir(path): shutil.rmtree(path) except OSError as e: LOG.warn(_LW("Failed to remove dir %(path)s, error: %(e)s"), {'path': path, 'e': e})
def _get_cluster_stacks(self, clusters, sid_to_cluster_mapping, cluster_stack_ids): stacks = [] _clusters = clusters _sid_to_cluster_mapping = sid_to_cluster_mapping _cluster_stack_ids = cluster_stack_ids for cluster in _clusters: try: # Create client with cluster's trustee user context bosc = clients.OpenStackClients( context.make_cluster_context(cluster)) stack = bosc.heat().stacks.get(cluster.stack_id) stacks.append(stack) # No need to do anything in this case except heat_exc.HTTPNotFound: pass except Exception as e: # Any other exception means we do not perform any # action on this cluster in the current sync run, so remove # it from all records. LOG.warning( _LW("Exception while attempting to retrieve " "Heat stack %(stack_id)s for cluster %(cluster_id)s. " "Traceback follows."), { 'stack_id': cluster.stack_id, 'cluster_id': cluster.id }) LOG.warning(e) _sid_to_cluster_mapping.pop(cluster.stack_id) _cluster_stack_ids.remove(cluster.stack_id) _clusters.remove(cluster) return [stacks, _clusters, _cluster_stack_ids, _sid_to_cluster_mapping]
def get_cert(cert_ref, **kwargs): """Retrieves the specified cert. :param cert_ref: the UUID of the cert to retrieve :return: magnum.common.cert_manager.cert_manager.Cert representation of the certificate data :raises CertificateStorageException: if certificate retrieval fails """ LOG.warn(_LW( "Loading certificate {0} from the local filesystem. " "CertManager type 'local' should be used for testing purpose." ).format(cert_ref)) filename_base = os.path.join(CONF.certificates.storage_path, cert_ref) filename_certificate = "{0}.crt".format(filename_base) filename_private_key = "{0}.key".format(filename_base) filename_intermediates = "{0}.int".format(filename_base) filename_pkp = "{0}.pass".format(filename_base) cert_data = dict() try: with open(filename_certificate, 'r') as cert_file: cert_data['certificate'] = cert_file.read() except IOError: LOG.error(_LE( "Failed to read certificate for {0}." ).format(cert_ref)) raise exception.CertificateStorageException( msg=_("Certificate could not be read.") ) try: with open(filename_private_key, 'r') as key_file: cert_data['private_key'] = key_file.read() except IOError: LOG.error(_LE( "Failed to read private key for {0}." ).format(cert_ref)) raise exception.CertificateStorageException( msg=_("Private Key could not be read.") ) try: with open(filename_intermediates, 'r') as int_file: cert_data['intermediates'] = int_file.read() except IOError as ioe: LOG.error(_LE("Failed to read certificate.")) raise exception.CertificateStorageException(msg=ioe.message) try: with open(filename_pkp, 'r') as pass_file: cert_data['private_key_passphrase'] = pass_file.read() except IOError as ioe: LOG.error(_LE("Failed to read certificate.")) raise exception.CertificateStorageException(msg=ioe.message) return Cert(**cert_data)
def sync_bay_status(self, ctx): try: LOG.debug('Starting to sync up bay status') osc = clients.OpenStackClients(ctx) filters = [bay_status.CREATE_IN_PROGRESS, bay_status.UPDATE_IN_PROGRESS, bay_status.DELETE_IN_PROGRESS] bays = objects.Bay.list_all(ctx, filters=filters) if not bays: return sid_to_bay_mapping = {bay.stack_id: bay for bay in bays} bay_stack_ids = sid_to_bay_mapping.keys() stacks = osc.heat().stacks.list(global_tenant=True, filters={'id': bay_stack_ids}) sid_to_stack_mapping = {s.id: s for s in stacks} for sid in (six.viewkeys(sid_to_bay_mapping) & six.viewkeys(sid_to_stack_mapping)): stack = sid_to_stack_mapping[sid] bay = sid_to_bay_mapping[sid] if bay.status != stack.stack_status: old_status = bay.status bay.status = stack.stack_status bay.save() LOG.info(_LI("Sync up bay with id %(id)s from " "%(old_status)s to %(status)s."), {'id': bay.id, 'old_status': old_status, 'status': bay.status}) for sid in (six.viewkeys(sid_to_bay_mapping) - six.viewkeys(sid_to_stack_mapping)): bay = sid_to_bay_mapping[sid] if bay.status == bay_status.DELETE_IN_PROGRESS: bay.destroy() LOG.info(_LI("Bay with id %(id)s has been deleted due " "to stack with id %(sid)s not found in " "Heat."), {'id': bay.id, 'sid': sid}) elif bay.status == bay_status.CREATE_IN_PROGRESS: bay.status = bay_status.CREATE_FAILED bay.save() LOG.info(_LI("Bay with id %(id)s has been set to " "%(status)s due to stack with id %(sid)s " "not found in Heat."), {'id': bay.id, 'status': bay.status, 'sid': sid}) elif bay.status == bay_status.UPDATE_IN_PROGRESS: bay.status = bay_status.UPDATE_FAILED bay.save() LOG.info(_LI("Bay with id %(id)s has been set to " "%(status)s due to stack with id %(sid)s " "not found in Heat."), {'id': bay.id, 'status': bay.status, 'sid': sid}) except Exception as e: LOG.warn(_LW("Ignore error [%s] when syncing up bay status."), e, exc_info=True)
def _send_bay_metrics(self, ctx): LOG.debug('Starting to send bay metrics') for bay in objects.Bay.list(ctx): if bay.status not in [ bay_status.CREATE_COMPLETE, bay_status.UPDATE_COMPLETE ]: continue monitor = monitors.create_monitor(ctx, bay) if monitor is None: continue try: monitor.pull_data() except Exception as e: LOG.warn(_LW("Skip pulling data from bay %(bay)s due to " "error: %(e)s"), { 'e': e, 'bay': bay.uuid }, exc_info=True) continue metrics = list() for name in monitor.get_metric_names(): try: metric = { 'name': name, 'value': monitor.compute_metric_value(name), 'unit': monitor.get_metric_unit(name), } metrics.append(metric) except Exception as e: LOG.warn(_LW("Skip adding metric %(name)s due to " "error: %(e)s"), { 'e': e, 'name': name }, exc_info=True) message = dict(metrics=metrics, user_id=bay.user_id, project_id=bay.project_id, resource_id=bay.uuid) LOG.debug("About to send notification: '%s'" % message) self.notifier.info(ctx, "magnum.bay.metrics.update", message)
def unlink_without_raise(path): try: os.unlink(path) except OSError as e: if e.errno == errno.ENOENT: return else: LOG.warning(_LW("Failed to unlink %(path)s, error: %(e)s"), {'path': path, 'e': e})
def get_scale_manager(context, osclient, cluster): cluster_driver = Driver.get_driver_for_cluster(context, cluster) manager = cluster_driver.get_scale_manager(context, osclient, cluster) if not manager: LOG.warning(_LW( "Currently only kubernetes and mesos cluster scale manager " "are available")) return manager
def get_scale_manager(context, osclient, cluster): cluster_driver = Driver.get_driver_for_cluster(context, cluster) manager = cluster_driver.get_scale_manager(context, osclient, cluster) if not manager: LOG.warning( _LW("Currently only kubernetes and mesos cluster scale manager " "are available")) return manager
def unlink_without_raise(path): try: os.unlink(path) except OSError as e: if e.errno == errno.ENOENT: return else: LOG.warn(_LW("Failed to unlink %(path)s, error: %(e)s"), {'path': path, 'e': e})
def create_link_without_raise(source, link): try: os.symlink(source, link) except OSError as e: if e.errno == errno.EEXIST: return else: LOG.warning(_LW("Failed to create symlink from %(source)s to " "%(link)s, error: %(e)s"), {'source': source, 'link': link, 'e': e})
def release(self, bay_uuid): """Release a bay lock.""" # Only the conductor that owns the lock will be releasing it. result = objects.BayLock.release(bay_uuid, self.conductor_id) if result is True: LOG.warn(_LW("Lock was already released on bay %s!"), bay_uuid) else: LOG.debug("Conductor %(conductor)s released lock on bay " "%(bay)s" % {'conductor': self.conductor_id, 'bay': bay_uuid})
def create_link_without_raise(source, link): try: os.symlink(source, link) except OSError as e: if e.errno == errno.EEXIST: return else: LOG.warn(_LW("Failed to create symlink from %(source)s to %(link)s" ", error: %(e)s"), {'source': source, 'link': link, 'e': e})
def _send_cluster_metrics(self, ctx): LOG.debug('Starting to send cluster metrics') for cluster in objects.Cluster.list(ctx): if cluster.status not in ( objects.fields.ClusterStatus.CREATE_COMPLETE, objects.fields.ClusterStatus.UPDATE_COMPLETE): continue monitor = monitors.create_monitor(ctx, cluster) if monitor is None: continue try: monitor.pull_data() except Exception as e: LOG.warning( _LW("Skip pulling data from cluster %(cluster)s due to " "error: %(e)s"), {'e': e, 'cluster': cluster.uuid}, exc_info=True) continue metrics = list() for name in monitor.get_metric_names(): try: metric = { 'name': name, 'value': monitor.compute_metric_value(name), 'unit': monitor.get_metric_unit(name), } metrics.append(metric) except Exception as e: LOG.warning(_LW("Skip adding metric %(name)s due to " "error: %(e)s"), {'e': e, 'name': name}, exc_info=True) message = dict(metrics=metrics, user_id=cluster.user_id, project_id=cluster.project_id, resource_id=cluster.uuid) LOG.debug("About to send notification: '%s'", message) self.notifier.info(ctx, "magnum.cluster.metrics.update", message)
def delete_certificates_from_bay(bay): """Delete ca cert and magnum client cert from bay :param bay: The bay which has certs """ for cert_ref in [bay.ca_cert_ref, bay.magnum_cert_ref]: try: if cert_ref: cert_manager.get_backend().CertManager.delete_cert(cert_ref) except Exception: LOG.warn(_LW("Deleting cert is failed: %s") % cert_ref)
def release(self, bay_uuid): """Release a bay lock.""" # Only the conductor that owns the lock will be releasing it. result = objects.BayLock.release(bay_uuid, self.conductor_id) if result is True: LOG.warn(_LW("Lock was already released on bay %s!"), bay_uuid) else: LOG.debug("Conductor %(conductor)s released lock on bay " "%(bay)s" % { 'conductor': self.conductor_id, 'bay': bay_uuid })
def is_cluster_in_final_state(self, cluster_id): try: resp, model = self.get_cluster(cluster_id) if model.status in ['CREATED', 'CREATE_COMPLETE', 'ERROR', 'CREATE_FAILED']: self.LOG.info(_LI('Cluster %s succeeded.'), cluster_id) return True else: return False except exceptions.NotFound: self.LOG.warning(_LW('Cluster %s is not found.'), cluster_id) return False
def is_cluster_in_final_state(self, cluster_id): try: resp, model = self.get_cluster(cluster_id) if model.status in ['CREATED', 'CREATE_COMPLETE', 'ERROR', 'CREATE_FAILED']: self.LOG.info(_LI('Cluster %s succeeded.') % cluster_id) return True else: return False except exceptions.NotFound: self.LOG.warning(_LW('Cluster %s is not found.') % cluster_id) return False
def _send_bay_metrics(self, ctx): LOG.debug('Starting to send bay metrics') for bay in objects.Bay.list(ctx): if bay.status not in [bay_status.CREATE_COMPLETE, bay_status.UPDATE_COMPLETE]: continue monitor = monitors.create_monitor(ctx, bay) if monitor is None: continue try: monitor.pull_data() except Exception as e: LOG.warn(_LW("Skip pulling data from bay %(bay)s due to " "error: %(e)s"), {'e': e, 'bay': bay.uuid}, exc_info=True) continue metrics = list() for name in monitor.get_metric_names(): try: metric = { 'name': name, 'value': monitor.compute_metric_value(name), 'unit': monitor.get_metric_unit(name), } metrics.append(metric) except Exception as e: LOG.warn(_LW("Skip adding metric %(name)s due to " "error: %(e)s"), {'e': e, 'name': name}, exc_info=True) message = dict(metrics=metrics, user_id=bay.user_id, project_id=bay.project_id, resource_id=bay.uuid) LOG.debug("About to send notification: '%s'" % message) self.notifier.info(ctx, "magnum.bay.metrics.update", message)
def delete_certificates_from_bay(bay, context=None): """Delete ca cert and magnum client cert from bay :param bay: The bay which has certs """ for cert_ref in ['ca_cert_ref', 'magnum_cert_ref']: try: cert_ref = getattr(bay, cert_ref, None) if cert_ref: cert_manager.get_backend().CertManager.delete_cert( cert_ref, resource_ref=bay.uuid, context=context) except Exception: LOG.warning(_LW("Deleting certs is failed for Bay %s"), bay.uuid)
def get_scale_manager(context, osclient, cluster): manager = None coe = cluster.cluster_template.coe if coe == 'kubernetes': manager = K8sScaleManager(context, osclient, cluster) elif coe == 'mesos': manager = MesosScaleManager(context, osclient, cluster) else: LOG.warning( _LW("Currently only kubernetes and mesos cluster scale manager " "are available")) return manager
def get_scale_manager(context, osclient, cluster): manager = None coe = cluster.cluster_template.coe if coe == 'kubernetes': manager = K8sScaleManager(context, osclient, cluster) elif coe == 'mesos': manager = MesosScaleManager(context, osclient, cluster) else: LOG.warning(_LW( "Currently only kubernetes and mesos cluster scale manager " "are available")) return manager
def magnum_url(self): endpoint_type = self._get_client_option('magnum', 'endpoint_type') region_name = self._get_client_option('magnum', 'region_name') try: return self.url_for(service_type='container-infra', interface=endpoint_type, region_name=region_name) except catalog.EndpointNotFound: url = self.url_for(service_type='container', interface=endpoint_type, region_name=region_name) LOG.warning(_LW('Service type "container" is deprecated and will ' 'be removed in a subsequent release')) return url
def delete_certificates_from_cluster(cluster, context=None): """Delete ca cert and magnum client cert from cluster :param cluster: The cluster which has certs """ for cert_ref in ['ca_cert_ref', 'magnum_cert_ref']: try: cert_ref = getattr(cluster, cert_ref, None) if cert_ref: cert_manager.get_backend().CertManager.delete_cert( cert_ref, resource_ref=cluster.uuid, context=context) except Exception: LOG.warning(_LW("Deleting certs is failed for Cluster %s"), cluster.uuid)
def safe_rstrip(value, chars=None): """Removes trailing characters from a string if that does not make it empty :param value: A string value that will be stripped. :param chars: Characters to remove. :return: Stripped value. """ if not isinstance(value, six.string_types): LOG.warn(_LW("Failed to remove trailing character. Returning original " "object. Supplied object is not a string: %s,"), value) return value return value.rstrip(chars) or value
def sync_cluster_status(self, ctx): try: LOG.debug('Starting to sync up cluster status') osc = clients.OpenStackClients(ctx) status = [ fields.ClusterStatus.CREATE_IN_PROGRESS, fields.ClusterStatus.UPDATE_IN_PROGRESS, fields.ClusterStatus.DELETE_IN_PROGRESS, fields.ClusterStatus.ROLLBACK_IN_PROGRESS ] filters = {'status': status} clusters = objects.Cluster.list(ctx, filters=filters) if not clusters: return sid_to_cluster_mapping = { cluster.stack_id: cluster for cluster in clusters } cluster_stack_ids = sid_to_cluster_mapping.keys() if CONF.periodic_global_stack_list: stacks = osc.heat().stacks.list( global_tenant=True, filters={'id': cluster_stack_ids}) else: ret = self._get_cluster_stacks(clusters, sid_to_cluster_mapping, cluster_stack_ids) [stacks, clusters, cluster_stack_ids, sid_to_cluster_mapping] = ret sid_to_stack_mapping = {s.id: s for s in stacks} # intersection of clusters magnum has and heat has for sid in (six.viewkeys(sid_to_cluster_mapping) & six.viewkeys(sid_to_stack_mapping)): stack = sid_to_stack_mapping[sid] cluster = sid_to_cluster_mapping[sid] self._sync_existing_cluster(cluster, stack) # the stacks that magnum has but heat doesn't have for sid in (six.viewkeys(sid_to_cluster_mapping) - six.viewkeys(sid_to_stack_mapping)): cluster = sid_to_cluster_mapping[sid] self._sync_missing_heat_stack(cluster) except Exception as e: LOG.warning( _LW("Ignore error [%s] when syncing up cluster status."), e, exc_info=True)
def store_cert(certificate, private_key, intermediates=None, private_key_passphrase=None, **kwargs): """Stores (i.e., registers) a cert with the cert manager. This method stores the specified cert to the filesystem and returns a UUID that can be used to retrieve it. :param certificate: PEM encoded TLS certificate :param private_key: private key for the supplied certificate :param intermediates: ordered and concatenated intermediate certs :param private_key_passphrase: optional passphrase for the supplied key :returns: the UUID of the stored cert :raises CertificateStorageException: if certificate storage fails """ cert_ref = str(uuid.uuid4()) filename_base = os.path.join(CONF.certificates.storage_path, cert_ref) LOG.warn( _LW("Storing certificate data on the local filesystem. " "CertManager type 'local' should be used for testing purpose.") ) try: filename_certificate = "{0}.crt".format(filename_base) with open(filename_certificate, 'w') as cert_file: cert_file.write(certificate) filename_private_key = "{0}.key".format(filename_base) with open(filename_private_key, 'w') as key_file: key_file.write(private_key) if intermediates: filename_intermediates = "{0}.int".format(filename_base) with open(filename_intermediates, 'w') as int_file: int_file.write(intermediates) if private_key_passphrase: filename_pkp = "{0}.pass".format(filename_base) with open(filename_pkp, 'w') as pass_file: pass_file.write(private_key_passphrase) except IOError as ioe: LOG.error(_LE("Failed to store certificate.")) raise exception.CertificateStorageException(msg=ioe.message) return cert_ref
def does_cluster_exist(self, cluster_id): try: resp, model = self.get_cluster(cluster_id) if model.status in ['CREATED', 'CREATE_COMPLETE']: self.LOG.info(_LI('Cluster %s is created.'), cluster_id) return True elif model.status in ['ERROR', 'CREATE_FAILED']: self.LOG.error(_LE('Cluster %s is in fail state.'), cluster_id) raise exceptions.ServerFault( "Got into an error condition: %s for %s" % (model.status, cluster_id)) else: return False except exceptions.NotFound: self.LOG.warning(_LW('Cluster %s is not found.'), cluster_id) return False
def does_bay_exist(self, bay_id): try: resp, model = self.get_bay(bay_id) if model.status in ['CREATED', 'CREATE_COMPLETE']: self.LOG.info(_LI('Bay %s is created.'), bay_id) return True elif model.status in ['ERROR', 'CREATE_FAILED']: self.LOG.error(_LE('Bay %s is in fail state.'), bay_id) raise exceptions.ServerFault( "Got into an error condition: %s for %s" % (model.status, bay_id)) else: return False except exceptions.NotFound: self.LOG.warning(_LW('Bay %s is not found.'), bay_id) return False
def store_cert(certificate, private_key, intermediates=None, private_key_passphrase=None, **kwargs): """Stores (i.e., registers) a cert with the cert manager. This method stores the specified cert to the filesystem and returns a UUID that can be used to retrieve it. :param certificate: PEM encoded TLS certificate :param private_key: private key for the supplied certificate :param intermediates: ordered and concatenated intermediate certs :param private_key_passphrase: optional passphrase for the supplied key :returns: the UUID of the stored cert :raises CertificateStorageException: if certificate storage fails """ cert_ref = str(uuid.uuid4()) filename_base = os.path.join(CONF.certificates.storage_path, cert_ref) LOG.warning(_LW( "Storing certificate data on the local filesystem. " "CertManager type 'local' should be used for testing purpose." )) try: filename_certificate = "{0}.crt".format(filename_base) with open(filename_certificate, 'w') as cert_file: cert_file.write(certificate) filename_private_key = "{0}.key".format(filename_base) with open(filename_private_key, 'w') as key_file: key_file.write(private_key) if intermediates: filename_intermediates = "{0}.int".format(filename_base) with open(filename_intermediates, 'w') as int_file: int_file.write(intermediates) if private_key_passphrase: filename_pkp = "{0}.pass".format(filename_base) with open(filename_pkp, 'w') as pass_file: pass_file.write(private_key_passphrase) except IOError as ioe: LOG.error(_LE("Failed to store certificate.")) raise exception.CertificateStorageException(msg=ioe.message) return cert_ref
def pull_data(self): with docker_utils.docker_for_cluster(self.context, self.cluster) as docker: system_info = docker.info() self.data['nodes'] = self._parse_node_info(system_info) # pull data from each container containers = [] for container in docker.containers(all=True): try: container = docker.inspect_container(container['Id']) except Exception as e: LOG.warning(_LW("Ignore error [%(e)s] when inspecting " "container %(container_id)s."), {'e': e, 'container_id': container['Id']}, exc_info=True) containers.append(container) self.data['containers'] = containers
def sync_cluster_status(self, ctx): try: LOG.debug('Starting to sync up cluster status') osc = clients.OpenStackClients(ctx) status = [fields.ClusterStatus.CREATE_IN_PROGRESS, fields.ClusterStatus.UPDATE_IN_PROGRESS, fields.ClusterStatus.DELETE_IN_PROGRESS, fields.ClusterStatus.ROLLBACK_IN_PROGRESS] filters = {'status': status} clusters = objects.Cluster.list(ctx, filters=filters) if not clusters: return sid_to_cluster_mapping = {cluster.stack_id: cluster for cluster in clusters} cluster_stack_ids = sid_to_cluster_mapping.keys() if CONF.periodic_global_stack_list: stacks = osc.heat().stacks.list( global_tenant=True, filters={'id': cluster_stack_ids}) else: ret = self._get_cluster_stacks( clusters, sid_to_cluster_mapping, cluster_stack_ids) [stacks, clusters, cluster_stack_ids, sid_to_cluster_mapping] = ret sid_to_stack_mapping = {s.id: s for s in stacks} # intersection of clusters magnum has and heat has for sid in (six.viewkeys(sid_to_cluster_mapping) & six.viewkeys(sid_to_stack_mapping)): stack = sid_to_stack_mapping[sid] cluster = sid_to_cluster_mapping[sid] self._sync_existing_cluster(cluster, stack) # the stacks that magnum has but heat doesn't have for sid in (six.viewkeys(sid_to_cluster_mapping) - six.viewkeys(sid_to_stack_mapping)): cluster = sid_to_cluster_mapping[sid] self._sync_missing_heat_stack(cluster) except Exception as e: LOG.warning(_LW( "Ignore error [%s] when syncing up cluster status." ), e, exc_info=True)
def get_removal_nodes(self, hosts_output): if not self._is_scale_down(): return list() bay = self.new_bay stack = self.osclient.heat().stacks.get(bay.stack_id) hosts = hosts_output.get_output_value(stack) if hosts is None: raise exception.MagnumException( _("Output key '%(output_key)s' is missing from stack " "%(stack_id)s") % { 'output_key': hosts_output.heat_output, 'stack_id': stack.id }) hosts_no_container = list(hosts) k8s_api = k8s.create_k8s_api(self.context, bay) for pod in k8s_api.list_namespaced_pod(namespace='default').items: host = pod.spec.node_name if host in hosts_no_container: hosts_no_container.remove(host) LOG.debug('List of hosts that has no container: %s', str(hosts_no_container)) num_of_removal = self._get_num_of_removal() if len(hosts_no_container) < num_of_removal: LOG.warning( _LW("About to remove %(num_removal)d nodes, which is larger than " "the number of empty nodes (%(num_empty)d). %(num_non_empty)d " "non-empty nodes will be removed."), { 'num_removal': num_of_removal, 'num_empty': len(hosts_no_container), 'num_non_empty': num_of_removal - len(hosts_no_container) }) hosts_to_remove = hosts_no_container[0:num_of_removal] LOG.info(_LI('Require removal of hosts: %s'), hosts_to_remove) return hosts_to_remove