def gnocchi_is_enabled(self): if self._gnocchi_is_enabled is None: if pecan.request.cfg.api.gnocchi_is_enabled is not None: self._gnocchi_is_enabled = ( pecan.request.cfg.api.gnocchi_is_enabled) elif ("gnocchi" not in pecan.request.cfg.meter_dispatchers or "database" in pecan.request.cfg.meter_dispatchers): self._gnocchi_is_enabled = False else: try: catalog = keystone_client.get_service_catalog( keystone_client.get_client(pecan.request.cfg)) catalog.url_for(service_type='metric') except exceptions.EndpointNotFound: self._gnocchi_is_enabled = False except exceptions.ClientException: LOG.warning(_LW("Can't connect to keystone, assuming " "gnocchi is disabled and retry later")) else: self._gnocchi_is_enabled = True LOG.warning(_LW("ceilometer-api started with gnocchi " "enabled. The resources/meters/samples " "URLs are disabled.")) return self._gnocchi_is_enabled
def panko_url(self): if self._panko_url is None: if pecan.request.cfg.api.panko_is_enabled is False: self._panko_url = "" elif pecan.request.cfg.api.panko_url is not None: self._panko_url = self._normalize_url( pecan.request.cfg.api.panko_url) else: try: catalog = keystone_client.get_service_catalog( keystone_client.get_client(pecan.request.cfg)) self._panko_url = self._normalize_url( catalog.url_for(service_type='event')) except exceptions.EndpointNotFound: self._panko_url = "" except exceptions.ClientException: LOG.warning( _LW("Can't connect to keystone, assuming Panko " "is disabled and retry later.")) else: LOG.warning( _LW("ceilometer-api started with Panko " "enabled. Events URLs will be redirected " "to Panko endpoint.")) return self._panko_url
def handle_sample(self, context, s): """Handle a sample, converting if necessary.""" key = s.name + s.resource_id prev = self.cache.get(key) timestamp = timeutils.parse_isotime(s.timestamp) self.cache[key] = (s.volume, timestamp) if prev: prev_volume = prev[0] prev_timestamp = prev[1] time_delta = timeutils.delta_seconds(prev_timestamp, timestamp) # disallow violations of the arrow of time if time_delta < 0: LOG.warn(_LW('Dropping out of time order sample: %s'), (s,)) # Reset the cache to the newer sample. self.cache[key] = prev return None volume_delta = s.volume - prev_volume if self.growth_only and volume_delta < 0: LOG.warn(_LW('Negative delta detected, dropping value')) s = None else: s = self._convert(s, volume_delta) LOG.debug('Converted to: %s', s) else: LOG.warn(_LW('Dropping sample with no predecessor: %s'), (s,)) s = None return s
def gnocchi_is_enabled(self): if self._gnocchi_is_enabled is None: if pecan.request.cfg.api.gnocchi_is_enabled is not None: self._gnocchi_is_enabled = ( pecan.request.cfg.api.gnocchi_is_enabled) elif ("gnocchi" not in pecan.request.cfg.meter_dispatchers or "database" in pecan.request.cfg.meter_dispatchers): self._gnocchi_is_enabled = False else: try: catalog = keystone_client.get_service_catalog( keystone_client.get_client(pecan.request.cfg)) catalog.url_for(service_type='metric') except exceptions.EndpointNotFound: self._gnocchi_is_enabled = False except exceptions.ClientException: LOG.warning( _LW("Can't connect to keystone, assuming " "gnocchi is disabled and retry later")) else: self._gnocchi_is_enabled = True LOG.warning( _LW("ceilometer-api started with gnocchi " "enabled. The resources/meters/samples " "URLs are disabled.")) return self._gnocchi_is_enabled
def _validate_volume(self, s): volume = s.volume if volume is None: LOG.warning( _LW('metering data %(counter_name)s for %(resource_id)s ' '@ %(timestamp)s has no volume (volume: None), the sample will' ' be dropped') % { 'counter_name': s.name, 'resource_id': s.resource_id, 'timestamp': s.timestamp if s.timestamp else 'NO TIMESTAMP' }) return False if not isinstance(volume, (int, float)): try: volume = float(volume) except ValueError: LOG.warning( _LW('metering data %(counter_name)s for %(resource_id)s ' '@ %(timestamp)s has volume which is not a number ' '(volume: %(counter_volume)s), the sample will be dropped' ) % { 'counter_name': s.name, 'resource_id': s.resource_id, 'timestamp': (s.timestamp if s.timestamp else 'NO TIMESTAMP'), 'counter_volume': volume }) return False return True
def _validate_volume(self, s): volume = s.volume if volume is None: LOG.warning( _LW( "metering data %(counter_name)s for %(resource_id)s " "@ %(timestamp)s has no volume (volume: None), the sample will" " be dropped" ) % { "counter_name": s.name, "resource_id": s.resource_id, "timestamp": s.timestamp if s.timestamp else "NO TIMESTAMP", } ) return False if not isinstance(volume, (int, float)): try: volume = float(volume) except ValueError: LOG.warning( _LW( "metering data %(counter_name)s for %(resource_id)s " "@ %(timestamp)s has volume which is not a number " "(volume: %(counter_volume)s), the sample will be dropped" ) % { "counter_name": s.name, "resource_id": s.resource_id, "timestamp": (s.timestamp if s.timestamp else "NO TIMESTAMP"), "counter_volume": volume, } ) return False return True
def handle_sample(self, context, s): """Handle a sample, converting if necessary.""" key = s.name + s.resource_id prev = self.cache.get(key) timestamp = timeutils.parse_isotime(s.timestamp) self.cache[key] = (s.volume, timestamp) if prev: prev_volume = prev[0] prev_timestamp = prev[1] time_delta = timeutils.delta_seconds(prev_timestamp, timestamp) # disallow violations of the arrow of time if time_delta < 0: LOG.warn(_LW('Dropping out of time order sample: %s'), (s, )) # Reset the cache to the newer sample. self.cache[key] = prev return None volume_delta = s.volume - prev_volume if self.growth_only and volume_delta < 0: LOG.warn(_LW('Negative delta detected, dropping value')) s = None else: s = self._convert(s, volume_delta) LOG.debug('Converted to: %s', s) else: LOG.warn(_LW('Dropping sample with no predecessor: %s'), (s, )) s = None return s
def _validate_volume(self, s): volume = s.volume if volume is None: LOG.warning(_LW( 'metering data %(counter_name)s for %(resource_id)s ' '@ %(timestamp)s has no volume (volume: None), the sample will' ' be dropped') % {'counter_name': s.name, 'resource_id': s.resource_id, 'timestamp': s.timestamp if s.timestamp else 'NO TIMESTAMP'} ) return False if not isinstance(volume, (int, float)): try: volume = float(volume) except ValueError: LOG.warning(_LW( 'metering data %(counter_name)s for %(resource_id)s ' '@ %(timestamp)s has volume which is not a number ' '(volume: %(counter_volume)s), the sample will be dropped') % {'counter_name': s.name, 'resource_id': s.resource_id, 'timestamp': ( s.timestamp if s.timestamp else 'NO TIMESTAMP'), 'counter_volume': volume} ) return False return True
def gnocchi_is_enabled(self): if self._gnocchi_is_enabled is None: if cfg.CONF.api.gnocchi_is_enabled is not None: self._gnocchi_is_enabled = cfg.CONF.api.gnocchi_is_enabled elif ("gnocchi" not in cfg.CONF.dispatcher or "database" in cfg.CONF.dispatcher): self._gnocchi_is_enabled = False else: try: ks = keystone_client.get_client() ks.service_catalog.url_for(service_type='metric') except exceptions.EndpointNotFound: self._gnocchi_is_enabled = False except exceptions.ClientException: LOG.warn( _LW("Can't connect to keystone, assuming gnocchi " "is disabled and retry later")) else: self._gnocchi_is_enabled = True LOG.warn( _LW("ceilometer-api started with gnocchi " "enabled. The resources/meters/samples " "URLs are disabled.")) return self._gnocchi_is_enabled
def aodh_url(self): if self._aodh_url is None: if cfg.CONF.api.aodh_is_enabled is False: self._aodh_url = "" elif cfg.CONF.api.aodh_url is not None: self._aodh_url = self._normalize_aodh_url( cfg.CONF.api.aodh_url) else: try: catalog = keystone_client.get_service_catalog( keystone_client.get_client()) self._aodh_url = self._normalize_aodh_url( catalog.url_for(service_type='alarming')) except exceptions.EndpointNotFound: self._aodh_url = "" except exceptions.ClientException: LOG.warning( _LW("Can't connect to keystone, assuming aodh " "is disabled and retry later.")) else: LOG.warning( _LW("ceilometer-api started with aodh " "enabled. Alarms URLs will be redirected " "to aodh endpoint.")) return self._aodh_url
def get_samples(self, manager, cache, resources): self._inspection_duration = self._record_poll_time() for instance in resources: LOG.debug('Checking memory usage for instance %s', instance.id) try: memory_info = self.inspector.inspect_memory_usage( instance, self._inspection_duration) LOG.debug("MEMORY USAGE: %(instance)s %(usage)f", { 'instance': instance, 'usage': memory_info.usage }) yield util.make_sample_from_instance( self.conf, instance, name='memory.usage', type=sample.TYPE_GAUGE, unit='MB', volume=memory_info.usage, ) except virt_inspector.InstanceNotFoundException as err: # Instance was deleted while getting samples. Ignore it. LOG.debug('Exception while getting samples %s', err) except virt_inspector.InstanceShutOffException as e: LOG.debug( 'Instance %(instance_id)s was shut off while ' 'getting samples of %(pollster)s: %(exc)s', { 'instance_id': instance.id, 'pollster': self.__class__.__name__, 'exc': e }) except virt_inspector.InstanceNoDataException as e: LOG.warning( _LW('Cannot inspect data of %(pollster)s for ' '%(instance_id)s, non-fatal reason: %(exc)s'), { 'pollster': self.__class__.__name__, 'instance_id': instance.id, 'exc': e }) except virt_inspector.NoDataException as e: LOG.warning( _LW('Cannot inspect data of %(pollster)s for ' '%(instance_id)s: %(exc)s'), { 'pollster': self.__class__.__name__, 'instance_id': instance.id, 'exc': e }) raise plugin_base.PollsterPermanentError(resources) except ceilometer.NotImplementedError: # Selected inspector does not implement this pollster. LOG.debug('Obtaining Memory Usage is not implemented for %s', self.inspector.__class__.__name__) raise plugin_base.PollsterPermanentError(resources) except Exception as err: LOG.exception( _LE('Could not get Memory Usage for ' '%(id)s: %(e)s'), { 'id': instance.id, 'e': err })
def discover(manager, param=None): endpoints = keystone_client.get_service_catalog( manager.keystone).get_urls( service_type=param, interface=cfg.CONF.service_credentials.interface, region_name=cfg.CONF.service_credentials.region_name) LOG.debug(_LW('Tony: endpoints = %s', endpoints)) if not endpoints: LOG.warning(_LW('No endpoints found for service %s'), "<all services>" if param is None else param) return [] return endpoints
def get_samples(self, manager, cache, resources): self._inspection_duration = self._record_poll_time() for instance in resources: LOG.debug(_('Checking resident memory for instance %s'), instance.id) try: memory_info = self.inspector.inspect_memory_resident( instance, self._inspection_duration) LOG.debug(_("RESIDENT MEMORY: %(instance)s %(resident)f"), ({ 'instance': instance.__dict__, 'resident': memory_info.resident })) yield util.make_sample_from_instance( instance, name='memory.resident', type=sample.TYPE_GAUGE, unit='MB', volume=memory_info.resident, ) except virt_inspector.InstanceNotFoundException as err: # Instance was deleted while getting samples. Ignore it. LOG.debug(_('Exception while getting samples %s'), err) except virt_inspector.InstanceShutOffException as e: LOG.warn( _LW('Instance %(instance_id)s was shut off while ' 'getting samples of %(pollster)s: %(exc)s'), { 'instance_id': instance.id, 'pollster': self.__class__.__name__, 'exc': e }) except virt_inspector.NoDataException as e: LOG.warn( _LW('Cannot inspect data of %(pollster)s for ' '%(instance_id)s, non-fatal reason: %(exc)s'), { 'pollster': self.__class__.__name__, 'instance_id': instance.id, 'exc': e }) except ceilometer.NotImplementedError: # Selected inspector does not implement this pollster. LOG.debug( _('Obtaining Resident Memory is not implemented' ' for %s'), self.inspector.__class__.__name__) except Exception as err: LOG.exception( _LE('Could not get Resident Memory Usage for ' '%(id)s: %(e)s'), { 'id': instance.id, 'e': err })
def discover(self, discovery=None, discovery_cache=None): resources = [] discovery = discovery or [] for url in discovery: if discovery_cache is not None and url in discovery_cache: resources.extend(discovery_cache[url]) continue name, param = self._parse_discoverer(url) discoverer = self._discoverer(name) if discoverer: try: if discoverer.KEYSTONE_REQUIRED_FOR_SERVICE: service_type = getattr( self.conf.service_types, discoverer.KEYSTONE_REQUIRED_FOR_SERVICE) if not keystone_client.get_service_catalog( self.keystone).get_endpoints( service_type=service_type): LOG.warning( _LW('Skipping %(name)s, %(service_type)s service ' 'is not registered in keystone'), { 'name': name, 'service_type': service_type }) continue discovered = discoverer.discover(self, param) if self.partition_coordinator: partitioned = ( self.partition_coordinator.extract_my_subset( self.construct_group_id(discoverer.group_id), discovered)) else: partitioned = discovered resources.extend(partitioned) if discovery_cache is not None: discovery_cache[url] = partitioned except ka_exceptions.ClientException as e: LOG.error( _LE('Skipping %(name)s, keystone issue: ' '%(exc)s'), { 'name': name, 'exc': e }) except Exception as err: LOG.exception(_LE('Unable to discover resources: %s'), err) else: LOG.warning(_LW('Unknown discovery extension: %s'), name) return resources
def inspect_disk_info(self, instance): domain = self._get_domain_not_shut_off_or_raise(instance) tree = etree.fromstring(domain.XMLDesc(0)) for disk in tree.findall('devices/disk'): disk_type = disk.get('type') if disk_type: if disk_type == 'network': LOG.warning( _LW('Inspection disk usage of network disk ' '%(instance_uuid)s unsupported by libvirt') % { 'instance_uuid': instance.id}) continue # NOTE(lhx): "cdrom" device associated to the configdrive # no longer has a "source" element. Releated bug: # https://bugs.launchpad.net/ceilometer/+bug/1622718 if disk.find('source') is None: continue target = disk.find('target') device = target.get('dev') if device: dsk = virt_inspector.Disk(device=device) block_info = domain.blockInfo(device) info = virt_inspector.DiskInfo(capacity=block_info[0], allocation=block_info[1], physical=block_info[2]) yield (dsk, info)
def __call__(self, *args, **kwargs): result = super(DeduplicatedCfgList, self).__call__(*args, **kwargs) if len(result) != len(set(result)): LOG.warning(_LW("Duplicated values: %s found in CLI options, " "auto de-duplidated"), result) result = list(set(result)) return result
def _sufficient(self, alarm, statistics): """Check for the sufficiency of the data for evaluation. Ensure there is sufficient data for evaluation, transitioning to unknown otherwise. """ sufficient = len(statistics) >= alarm.rule['evaluation_periods'] if not sufficient and alarm.state != evaluator.UNKNOWN: LOG.warn( _LW('Expecting %(expected)d datapoints but only get ' '%(actual)d') % { 'expected': alarm.rule['evaluation_periods'], 'actual': len(statistics) }) # Reason is not same as log message because we want to keep # consistent since thirdparty software may depend on old format. reason = _( '%d datapoints are unknown') % alarm.rule['evaluation_periods'] last = None if not statistics else (getattr( statistics[-1], alarm.rule['statistic'])) reason_data = self._reason_data('unknown', alarm.rule['evaluation_periods'], last) self._refresh(alarm, evaluator.UNKNOWN, reason, reason_data) return sufficient
def __init__(self, conf, parsed_url): super(DirectPublisher, self).__init__(conf, parsed_url) default_dispatcher = parsed_url.scheme if default_dispatcher == 'direct': LOG.warning( _LW('Direct publisher is deprecated for removal. Use ' 'an explicit publisher instead, e.g. "gnocchi", ' '"database", "file", ...')) default_dispatcher = 'database' options = urlparse.parse_qs(parsed_url.query) self.dispatcher_name = options.get('dispatcher', [default_dispatcher])[-1] self._sample_dispatcher = None self._event_dispatcher = None try: self.sample_driver = driver.DriverManager( 'ceilometer.dispatcher.meter', self.dispatcher_name).driver except stevedore.exception.NoMatches: self.sample_driver = None try: self.event_driver = driver.DriverManager( 'ceilometer.dispatcher.event', self.dispatcher_name).driver except stevedore.exception.NoMatches: self.event_driver = None
def get_samples(self, manager, cache, resources): self._inspection_duration = self._record_poll_time() for instance in resources: instance_name = util.instance_name(instance) LOG.debug('checking net info for instance %s', instance.id) try: vnics = self._get_vnics_for_instance( cache, self.inspector, instance, ) for vnic, info in vnics: LOG.debug(self.NET_USAGE_MESSAGE, instance_name, vnic.name, self._get_rx_info(info), self._get_tx_info(info)) yield self._get_sample(instance, vnic, info) except virt_inspector.InstanceNotFoundException as err: # Instance was deleted while getting samples. Ignore it. LOG.debug('Exception while getting samples %s', err) except virt_inspector.InstanceShutOffException as e: LOG.warn(_LW('Instance %(instance_id)s was shut off while ' 'getting samples of %(pollster)s: %(exc)s'), {'instance_id': instance.id, 'pollster': self.__class__.__name__, 'exc': e}) except ceilometer.NotImplementedError: # Selected inspector does not implement this pollster. LOG.debug('%(inspector)s does not provide data for ' ' %(pollster)s', {'inspector': self.inspector.__class__.__name__, 'pollster': self.__class__.__name__}) except Exception as err: LOG.exception(_('Ignoring instance %(name)s: %(error)s'), {'name': instance_name, 'error': err})
def __init__(self, parsed_url): self.kafka_client = None self.host, self.port = netutils.parse_host_port( parsed_url.netloc, default_port=9092) self.local_queue = [] params = urlparse.parse_qs(parsed_url.query) self.topic = params.get('topic', ['ceilometer'])[-1] self.policy = params.get('policy', ['default'])[-1] self.max_queue_length = int(params.get( 'max_queue_length', [1024])[-1]) self.max_retry = int(params.get('max_retry', [100])[-1]) if self.policy in ['default', 'drop', 'queue']: LOG.info(_LI('Publishing policy set to %s') % self.policy) else: LOG.warn(_LW('Publishing policy is unknown (%s) force to default') % self.policy) self.policy = 'default' try: self._get_client() except Exception as e: LOG.exception(_LE("Failed to connect to Kafka service: %s"), e)
def __init__(self, conf, parsed_url): super(DirectPublisher, self).__init__(conf, parsed_url) default_dispatcher = parsed_url.scheme if default_dispatcher == 'direct': LOG.warning(_LW('Direct publisher is deprecated for removal. Use ' 'an explicit publisher instead, e.g. "gnocchi", ' '"database", "file", ...')) default_dispatcher = 'database' options = urlparse.parse_qs(parsed_url.query) self.dispatcher_name = options.get('dispatcher', [default_dispatcher])[-1] self._sample_dispatcher = None self._event_dispatcher = None try: self.sample_driver = driver.DriverManager( 'ceilometer.dispatcher.meter', self.dispatcher_name).driver except stevedore.exception.NoMatches: self.sample_driver = None try: self.event_driver = driver.DriverManager( 'ceilometer.dispatcher.event', self.dispatcher_name).driver except stevedore.exception.NoMatches: self.event_driver = None
def __init__(self, conf): super(GnocchiDispatcher, self).__init__(conf) self.conf = conf self.filter_service_activity = ( conf.dispatcher_gnocchi.filter_service_activity) self._ks_client = keystone_client.get_client(conf) self.resources_definition = self._load_resources_definitions(conf) self.cache = None try: import oslo_cache oslo_cache.configure(self.conf) # NOTE(cdent): The default cache backend is a real but # noop backend. We don't want to use that here because # we want to avoid the cache pathways entirely if the # cache has not been configured explicitly. if self.conf.cache.enabled: cache_region = oslo_cache.create_region() self.cache = oslo_cache.configure_cache_region( self.conf, cache_region) self.cache.key_mangler = cache_key_mangler except ImportError: pass except oslo_cache.exception.ConfigurationError as exc: LOG.warning(_LW('unable to configure oslo_cache: %s'), exc) self._gnocchi_project_id = None self._gnocchi_project_id_lock = threading.Lock() self._gnocchi_resource_lock = LockedDefaultDict(threading.Lock) self._gnocchi = gnocchi_client.get_gnocchiclient(conf)
def extract_my_subset(self, group_id, iterable, attempt=0): """Filters an iterable, returning only objects assigned to this agent. We have a list of objects and get a list of active group members from `tooz`. We then hash all the objects into buckets and return only the ones that hashed into *our* bucket. """ if not group_id: return iterable if group_id not in self._groups: self.join_group(group_id) try: members = self._get_members(group_id) LOG.debug('Members of group: %s, Me: %s', members, self._my_id) if self._my_id not in members: LOG.warning( _LW('Cannot extract tasks because agent failed to ' 'join group properly. Rejoining group.')) self.join_group(group_id) members = self._get_members(group_id) if self._my_id not in members: raise MemberNotInGroupError(group_id, members, self._my_id) hr = utils.HashRing(members) filtered = [ v for v in iterable if hr.get_node(str(v)) == self._my_id ] LOG.debug('My subset: %s', [str(f) for f in filtered]) return filtered except tooz.coordination.ToozError: LOG.exception( _LE('Error getting group membership info from ' 'coordination backend.')) return []
def record_metering_data(self, data): # We may have receive only one counter on the wire if not isinstance(data, list): data = [data] for meter in data: LOG.debug( _('metering data %(counter_name)s ' 'for %(resource_id)s @ %(timestamp)s: %(counter_volume)s') % ({ 'counter_name': meter['counter_name'], 'resource_id': meter['resource_id'], 'timestamp': meter.get('timestamp', 'NO TIMESTAMP'), 'counter_volume': meter['counter_volume'] })) if publisher_utils.verify_signature( meter, self.conf.publisher.telemetry_secret): try: # Convert the timestamp to a datetime instance. # Storage engines are responsible for converting # that value to something they can store. if meter.get('timestamp'): ts = timeutils.parse_isotime(meter['timestamp']) meter['timestamp'] = timeutils.normalize_time(ts) self.meter_conn.record_metering_data(meter) except Exception as err: LOG.exception(_LE('Failed to record metering data: %s'), err) # raise the exception to propagate it up in the chain. raise else: LOG.warning( _LW('message signature invalid, discarding message: %r'), meter)
def record_events(self, events): if not isinstance(events, list): events = [events] event_list = [] for ev in events: if publisher_utils.verify_signature( ev, self.conf.publisher.telemetry_secret): try: event_list.append( models.Event( message_id=ev['message_id'], event_type=ev['event_type'], generated=timeutils.normalize_time( timeutils.parse_isotime(ev['generated'])), traits=[models.Trait( name, dtype, models.Trait.convert_value(dtype, value)) for name, dtype, value in ev['traits']], raw=ev.get('raw', {})) ) except Exception: LOG.exception(_LE("Error processing event and it will be " "dropped: %s"), ev) else: LOG.warning(_LW( 'event signature invalid, discarding event: %s'), ev) self.event_conn.record_events(event_list)
def start(self): super(NotificationService, self).start() self.partition_coordinator = None self.coord_lock = threading.Lock() self.listeners = [] # NOTE(kbespalov): for the pipeline queues used a single amqp host # hence only one listener is required self.pipeline_listener = None self.pipeline_manager = pipeline.setup_pipeline() if cfg.CONF.notification.store_events: self.event_pipeline_manager = pipeline.setup_event_pipeline() self.transport = messaging.get_transport() if cfg.CONF.notification.workload_partitioning: self.ctxt = context.get_admin_context() self.group_id = self.NOTIFICATION_NAMESPACE self.partition_coordinator = coordination.PartitionCoordinator() self.partition_coordinator.start() else: # FIXME(sileht): endpoint uses the notification_topics option # and it should not because this is an oslo_messaging option # not a ceilometer. Until we have something to get the # notification_topics in another way, we must create a transport # to ensure the option has been registered by oslo_messaging. messaging.get_notifier(self.transport, '') self.group_id = None self.pipe_manager = self._get_pipe_manager(self.transport, self.pipeline_manager) self.event_pipe_manager = self._get_event_pipeline_manager( self.transport) self._configure_main_queue_listeners(self.pipe_manager, self.event_pipe_manager) if cfg.CONF.notification.workload_partitioning: # join group after all manager set up is configured self.partition_coordinator.join_group(self.group_id) self.partition_coordinator.watch_group(self.group_id, self._refresh_agent) self.tg.add_timer(cfg.CONF.coordination.heartbeat, self.partition_coordinator.heartbeat) self.tg.add_timer(cfg.CONF.coordination.check_watchers, self.partition_coordinator.run_watchers) # configure pipelines after all coordination is configured. self._configure_pipeline_listener() if not cfg.CONF.notification.disable_non_metric_meters: LOG.warning(_LW('Non-metric meters may be collected. It is highly ' 'advisable to disable these meters using ' 'ceilometer.conf or the pipeline.yaml')) # Add a dummy thread to have wait() working self.tg.add_timer(604800, lambda: None) self.init_pipeline_refresh()
def __init__(self, parsed_url): self.kafka_client = None self.host, self.port = netutils.parse_host_port(parsed_url.netloc, default_port=9092) self.local_queue = [] params = urlparse.parse_qs(parsed_url.query) self.topic = params.get('topic', ['ceilometer'])[-1] self.policy = params.get('policy', ['default'])[-1] self.max_queue_length = int(params.get('max_queue_length', [1024])[-1]) self.max_retry = int(params.get('max_retry', [100])[-1]) if self.policy in ['default', 'drop', 'queue']: LOG.info(_LI('Publishing policy set to %s') % self.policy) else: LOG.warn( _LW('Publishing policy is unknown (%s) force to default') % self.policy) self.policy = 'default' try: self._get_client() except Exception as e: LOG.exception(_LE("Failed to connect to Kafka service: %s"), e)
def _check_queue_length(self): queue_length = len(self.local_queue) if queue_length > self.max_queue_length > 0: diff = queue_length - self.max_queue_length self.local_queue = self.local_queue[diff:] LOG.warn(_LW("Kafka Publisher max local queue length is exceeded, " "dropping %d oldest data") % diff)
def start_udp(self): address_family = socket.AF_INET if netutils.is_valid_ipv6(cfg.CONF.collector.udp_address): address_family = socket.AF_INET6 udp = socket.socket(address_family, socket.SOCK_DGRAM) udp.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) udp.bind((cfg.CONF.collector.udp_address, cfg.CONF.collector.udp_port)) self.udp_run = True while self.udp_run: # NOTE(jd) Arbitrary limit of 64K because that ought to be # enough for anybody. data, source = udp.recvfrom(64 * units.Ki) try: sample = msgpack.loads(data, encoding='utf-8') except Exception: LOG.warning(_("UDP: Cannot decode data sent by %s"), source) else: if publisher_utils.verify_signature( sample, cfg.CONF.publisher.telemetry_secret): try: LOG.debug("UDP: Storing %s", sample) self.meter_manager.map_method( 'record_metering_data', sample) except Exception: LOG.exception(_("UDP: Unable to store meter")) else: LOG.warning(_LW('sample signature invalid, ' 'discarding: %s'), sample)
def setup_app(pecan_config=None): # FIXME: Replace DBHook with a hooks.TransactionHook app_hooks = [hooks.ConfigHook(), hooks.DBHook(), hooks.NotifierHook(), hooks.TranslationHook()] pecan_config = pecan_config or { "app": { 'root': 'ceilometer.api.controllers.root.RootController', 'modules': ['ceilometer.api'], } } pecan.configuration.set_config(dict(pecan_config), overwrite=True) # NOTE(sileht): pecan debug won't work in multi-process environment pecan_debug = CONF.api.pecan_debug if CONF.api.workers and CONF.api.workers != 1 and pecan_debug: pecan_debug = False LOG.warning(_LW('pecan_debug cannot be enabled, if workers is > 1, ' 'the value is overrided with False')) app = pecan.make_app( pecan_config['app']['root'], debug=pecan_debug, hooks=app_hooks, wrap_app=middleware.ParsableErrorMiddleware, guess_content_type_from_ext=False ) return app
def _load_definitions(self): plugin_manager = extension.ExtensionManager( namespace='ceilometer.event.trait_plugin') meters_cfg = declarative.load_definitions( self.manager.conf, {}, self.manager.conf.meter.meter_definitions_cfg_file, pkg_resources.resource_filename(__name__, "data/meters.yaml")) definitions = {} disable_non_metric_meters = ( self.manager.conf.notification.disable_non_metric_meters) for meter_cfg in reversed(meters_cfg['metric']): if meter_cfg.get('name') in definitions: # skip duplicate meters LOG.warning( _LW("Skipping duplicate meter definition %s") % meter_cfg) continue if (meter_cfg.get('volume') != 1 or not disable_non_metric_meters): try: md = MeterDefinition(meter_cfg, plugin_manager) except declarative.DefinitionException as e: errmsg = _LE("Error loading meter definition: %s") LOG.error(errmsg, six.text_type(e)) else: definitions[meter_cfg['name']] = md return definitions.values()
def start_udp(self): address_family = socket.AF_INET if netutils.is_valid_ipv6(cfg.CONF.collector.udp_address): address_family = socket.AF_INET6 udp = socket.socket(address_family, socket.SOCK_DGRAM) udp.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) udp.bind((cfg.CONF.collector.udp_address, cfg.CONF.collector.udp_port)) self.udp_run = True while self.udp_run: # NOTE(jd) Arbitrary limit of 64K because that ought to be # enough for anybody. data, source = udp.recvfrom(64 * units.Ki) try: sample = msgpack.loads(data, encoding='utf-8') except Exception: LOG.warning(_("UDP: Cannot decode data sent by %s"), source) else: if publisher_utils.verify_signature( sample, cfg.CONF.publisher.telemetry_secret): try: LOG.debug("UDP: Storing %s", sample) self.meter_manager.map_method('record_metering_data', sample) except Exception: LOG.exception(_("UDP: Unable to store meter")) else: LOG.warning( _LW('sample signature invalid, ' 'discarding: %s'), sample)
def get_samples(self, manager, cache, resources): self._inspection_duration = self._record_poll_time() for instance in resources: instance_name = util.instance_name(instance) LOG.debug('checking net info for instance %s', instance.id) try: vnics = self._get_vnics_for_instance( cache, self.inspector, instance, ) for vnic, info in vnics: LOG.debug(self.NET_USAGE_MESSAGE, instance_name, vnic.name, self._get_rx_info(info), self._get_tx_info(info)) yield self._get_sample(instance, vnic, info) except virt_inspector.InstanceNotFoundException as err: # Instance was deleted while getting samples. Ignore it. LOG.debug('Exception while getting samples %s', err) except virt_inspector.InstanceShutOffException as e: LOG.warning(_LW('Instance %(instance_id)s was shut off while ' 'getting samples of %(pollster)s: %(exc)s'), {'instance_id': instance.id, 'pollster': self.__class__.__name__, 'exc': e}) except ceilometer.NotImplementedError: # Selected inspector does not implement this pollster. LOG.debug('%(inspector)s does not provide data for ' ' %(pollster)s', {'inspector': self.inspector.__class__.__name__, 'pollster': self.__class__.__name__}) except Exception as err: LOG.exception(_('Ignoring instance %(name)s: %(error)s'), {'name': instance_name, 'error': err})
def get_samples(self, manager, cache, resources): self._inspection_duration = self._record_poll_time() for instance in resources: try: c_data = self._populate_cache( self.inspector, cache, instance, ) for s in self._get_samples(instance, c_data): yield s except virt_inspector.InstanceNotFoundException as err: # Instance was deleted while getting samples. Ignore it. LOG.debug('Exception while getting samples %s', err) except virt_inspector.InstanceShutOffException as e: LOG.debug('Instance %(instance_id)s was shut off while ' 'getting samples of %(pollster)s: %(exc)s', {'instance_id': instance.id, 'pollster': self.__class__.__name__, 'exc': e}) except virt_inspector.NoDataException as e: LOG.warning(_LW('Cannot inspect data of %(pollster)s for ' '%(instance_id)s, non-fatal reason: %(exc)s'), {'pollster': self.__class__.__name__, 'instance_id': instance.id, 'exc': e}) raise plugin_base.PollsterPermanentError(resources) except ceilometer.NotImplementedError: # Selected inspector does not implement this pollster. LOG.debug('Obtaining memory bandwidth is not implemented' ' for %s', self.inspector.__class__.__name__) except Exception as err: LOG.exception(_LE('Could not get memory bandwidth for ' '%(id)s: %(e)s'), {'id': instance.id, 'e': err})
def get_samples(self, manager, cache, resources): for instance in resources: try: disk_size_info = self._populate_cache( self.inspector, cache, instance, ) for disk_info in self._get_samples(instance, disk_size_info): yield disk_info except virt_inspector.InstanceNotFoundException as err: # Instance was deleted while getting samples. Ignore it. LOG.debug('Exception while getting samples %s', err) except virt_inspector.InstanceShutOffException as e: LOG.warning(_LW('Instance %(instance_id)s was shut off while ' 'getting samples of %(pollster)s: %(exc)s'), {'instance_id': instance.id, 'pollster': self.__class__.__name__, 'exc': e}) except ceilometer.NotImplementedError: # Selected inspector does not implement this pollster. LOG.debug('%(inspector)s does not provide data for ' ' %(pollster)s', {'inspector': self.inspector.__class__.__name__, 'pollster': self.__class__.__name__}) except Exception as err: instance_name = util.instance_name(instance) LOG.exception(_('Ignoring instance %(name)s ' '(%(instance_id)s) : %(error)s') % ( {'name': instance_name, 'instance_id': instance.id, 'error': err}))
def __init__(self, conf): super(GnocchiDispatcher, self).__init__(conf) self.conf = conf self.filter_service_activity = ( conf.dispatcher_gnocchi.filter_service_activity) self._ks_client = keystone_client.get_client() self.resources_definition = self._load_resources_definitions(conf) self.cache = None try: import oslo_cache oslo_cache.configure(self.conf) # NOTE(cdent): The default cache backend is a real but # noop backend. We don't want to use that here because # we want to avoid the cache pathways entirely if the # cache has not been configured explicitly. if 'null' not in self.conf.cache.backend: cache_region = oslo_cache.create_region() self.cache = oslo_cache.configure_cache_region( self.conf, cache_region) self.cache.key_mangler = cache_key_mangler except ImportError: pass except oslo_cache.exception.ConfigurationError as exc: LOG.warn(_LW('unable to configure oslo_cache: %s') % exc) self._gnocchi_project_id = None self._gnocchi_project_id_lock = threading.Lock() self._gnocchi_resource_lock = threading.Lock() self._gnocchi = gnocchi_client.Client(conf.dispatcher_gnocchi.url)
def extract_my_subset(self, group_id, iterable, attempt=0): """Filters an iterable, returning only objects assigned to this agent. We have a list of objects and get a list of active group members from `tooz`. We then hash all the objects into buckets and return only the ones that hashed into *our* bucket. """ if not group_id: return iterable if group_id not in self._groups: self.join_group(group_id) try: members = self._get_members(group_id) LOG.debug('Members of group: %s, Me: %s', members, self._my_id) if self._my_id not in members: raise tooz.coordination.MemberNotJoined(group_id, self._my_id) hr = utils.HashRing(members) filtered = [v for v in iterable if hr.get_node(str(v)) == self._my_id] LOG.debug('My subset: %s', [str(f) for f in filtered]) return filtered except tooz.coordination.MemberNotJoined: if attempt >= 5: raise LOG.warning(_LW('Cannot extract tasks because agent failed to ' 'join group properly. Rejoining group.')) self.join_group(group_id) return self.extract_my_subset(group_id, iterable, attempt + 1) except tooz.coordination.ToozError: LOG.exception(_LE('Error getting group membership info from ' 'coordination backend.')) return []
def _load_definitions(): plugin_manager = extension.ExtensionManager( namespace='ceilometer.event.trait_plugin') meters_cfg = declarative.load_definitions( {}, cfg.CONF.meter.meter_definitions_cfg_file, pkg_resources.resource_filename(__name__, "data/meters.yaml")) definitions = {} for meter_cfg in reversed(meters_cfg['metric']): if meter_cfg.get('name') in definitions: # skip duplicate meters LOG.warning(_LW("Skipping duplicate meter definition %s") % meter_cfg) continue if (meter_cfg.get('volume') != 1 or not cfg.CONF.notification.disable_non_metric_meters): try: md = MeterDefinition(meter_cfg, plugin_manager) except declarative.DefinitionException as me: errmsg = (_LE("Error loading meter definition : %(err)s") % dict(err=six.text_type(me))) LOG.error(errmsg) else: definitions[meter_cfg['name']] = md return definitions.values()
def start(self): super(NotificationService, self).start() self.pipeline_manager = pipeline.setup_pipeline() if cfg.CONF.notification.store_events: self.event_pipeline_manager = pipeline.setup_event_pipeline() transport = messaging.get_transport() self.partition_coordinator = coordination.PartitionCoordinator() self.partition_coordinator.start() event_pipe_manager = None if cfg.CONF.notification.workload_partitioning: pipe_manager = pipeline.SamplePipelineTransportManager() for pipe in self.pipeline_manager.pipelines: pipe_manager.add_transporter( (pipe.source.support_meter, self._get_notifier(transport, pipe))) if cfg.CONF.notification.store_events: event_pipe_manager = pipeline.EventPipelineTransportManager() for pipe in self.event_pipeline_manager.pipelines: event_pipe_manager.add_transporter( (pipe.source.support_event, self._get_notifier(transport, pipe))) self.ctxt = context.get_admin_context() self.group_id = self.NOTIFICATION_NAMESPACE else: # FIXME(sileht): endpoint use notification_topics option # and it should not because this is oslo_messaging option # not a ceilometer, until we have a something to get # the notification_topics in an other way # we must create a transport to ensure the option have # beeen registered by oslo_messaging messaging.get_notifier(transport, '') pipe_manager = self.pipeline_manager if cfg.CONF.notification.store_events: event_pipe_manager = self.event_pipeline_manager self.group_id = None self.listeners, self.pipeline_listeners = [], [] self._configure_main_queue_listeners(pipe_manager, event_pipe_manager) if cfg.CONF.notification.workload_partitioning: self.partition_coordinator.join_group(self.group_id) self._configure_pipeline_listeners() self.partition_coordinator.watch_group(self.group_id, self._refresh_agent) self.tg.add_timer(cfg.CONF.coordination.heartbeat, self.partition_coordinator.heartbeat) self.tg.add_timer(cfg.CONF.coordination.check_watchers, self.partition_coordinator.run_watchers) if not cfg.CONF.notification.disable_non_metric_meters: LOG.warning( _LW('Non-metric meters may be collected. It is highly ' 'advisable to disable these meters using ' 'ceilometer.conf or the pipeline.yaml')) # Add a dummy thread to have wait() working self.tg.add_timer(604800, lambda: None)
def setup_app(pecan_config=None): # FIXME: Replace DBHook with a hooks.TransactionHook app_hooks = [ hooks.ConfigHook(), hooks.DBHook(), hooks.NotifierHook(), hooks.TranslationHook() ] pecan_config = pecan_config or { "app": { 'root': 'ceilometer.api.controllers.root.RootController', 'modules': ['ceilometer.api'], } } pecan.configuration.set_config(dict(pecan_config), overwrite=True) # NOTE(sileht): pecan debug won't work in multi-process environment pecan_debug = CONF.api.pecan_debug if CONF.api.workers and CONF.api.workers != 1 and pecan_debug: pecan_debug = False LOG.warning( _LW('pecan_debug cannot be enabled, if workers is > 1, ' 'the value is overrided with False')) app = pecan.make_app(pecan_config['app']['root'], debug=pecan_debug, hooks=app_hooks, wrap_app=middleware.ParsableErrorMiddleware, guess_content_type_from_ext=False) return app
def record_metering_data(self, data): # We may have receive only one counter on the wire if not isinstance(data, list): data = [data] for meter in data: LOG.debug( "metering data %(counter_name)s " "for %(resource_id)s @ %(timestamp)s: %(counter_volume)s", { "counter_name": meter["counter_name"], "resource_id": meter["resource_id"], "timestamp": meter.get("timestamp", "NO TIMESTAMP"), "counter_volume": meter["counter_volume"], }, ) if publisher_utils.verify_signature(meter, self.conf.publisher.telemetry_secret): try: # Convert the timestamp to a datetime instance. # Storage engines are responsible for converting # that value to something they can store. if meter.get("timestamp"): ts = timeutils.parse_isotime(meter["timestamp"]) meter["timestamp"] = timeutils.normalize_time(ts) self.meter_conn.record_metering_data(meter) except Exception as err: LOG.exception(_LE("Failed to record metering data: %s"), err) # raise the exception to propagate it up in the chain. raise else: LOG.warning(_LW("message signature invalid, discarding message: %r"), meter)
def inspect_disk_info(self, instance): domain = self._get_domain_not_shut_off_or_raise(instance) tree = etree.fromstring(domain.XMLDesc(0)) for disk in tree.findall('devices/disk'): disk_type = disk.get('type') if disk_type: if disk_type == 'network': LOG.warning( _LW('Inspection disk usage of network disk ' '%(instance_uuid)s unsupported by libvirt') % {'instance_uuid': instance.id}) continue # NOTE(lhx): "cdrom" device associated to the configdrive # no longer has a "source" element. Releated bug: # https://bugs.launchpad.net/ceilometer/+bug/1622718 if disk.find('source') is None: continue target = disk.find('target') device = target.get('dev') if device: dsk = virt_inspector.Disk(device=device) block_info = domain.blockInfo(device) info = virt_inspector.DiskInfo(capacity=block_info[0], allocation=block_info[1], physical=block_info[2]) yield (dsk, info)
def get_samples(self, manager, cache, resources): for instance in resources: instance_name = util.instance_name(instance) try: c_data = self._populate_cache( self.inspector, cache, instance, ) for s in self._get_samples(instance, c_data): yield s except virt_inspector.InstanceNotFoundException as err: # Instance was deleted while getting samples. Ignore it. LOG.debug('Exception while getting samples %s', err) except virt_inspector.InstanceShutOffException as e: LOG.warn( _LW('Instance %(instance_id)s was shut off while ' 'getting samples of %(pollster)s: %(exc)s'), { 'instance_id': instance.id, 'pollster': self.__class__.__name__, 'exc': e }) except ceilometer.NotImplementedError: # Selected inspector does not implement this pollster. LOG.debug( '%(inspector)s does not provide data for ' ' %(pollster)s', { 'inspector': self.inspector.__class__.__name__, 'pollster': self.__class__.__name__ }) except Exception as err: LOG.exception(_('Ignoring instance %(name)s: %(error)s'), { 'name': instance_name, 'error': err })
def setup_app(pecan_config=None, extra_hooks=None): # FIXME: Replace DBHook with a hooks.TransactionHook app_hooks = [hooks.ConfigHook(), hooks.DBHook(), hooks.NotifierHook(), hooks.TranslationHook()] if extra_hooks: app_hooks.extend(extra_hooks) if not pecan_config: pecan_config = get_pecan_config() pecan.configuration.set_config(dict(pecan_config), overwrite=True) # NOTE(sileht): pecan debug won't work in multi-process environment pecan_debug = CONF.api.pecan_debug if service.get_workers('api') != 1 and pecan_debug: pecan_debug = False LOG.warning(_LW('pecan_debug cannot be enabled, if workers is > 1, ' 'the value is overrided with False')) app = pecan.make_app( pecan_config.app.root, debug=pecan_debug, force_canonical=getattr(pecan_config.app, 'force_canonical', True), hooks=app_hooks, wrap_app=middleware.ParsableErrorMiddleware, guess_content_type_from_ext=False ) return app
def start(self): super(NotificationService, self).start() self.pipeline_manager = pipeline.setup_pipeline() if cfg.CONF.notification.store_events: self.event_pipeline_manager = pipeline.setup_event_pipeline() transport = messaging.get_transport() self.partition_coordinator = coordination.PartitionCoordinator() self.partition_coordinator.start() event_pipe_manager = None if cfg.CONF.notification.workload_partitioning: pipe_manager = pipeline.SamplePipelineTransportManager() for pipe in self.pipeline_manager.pipelines: pipe_manager.add_transporter( (pipe.source.support_meter, self._get_notifier(transport, pipe))) if cfg.CONF.notification.store_events: event_pipe_manager = pipeline.EventPipelineTransportManager() for pipe in self.event_pipeline_manager.pipelines: event_pipe_manager.add_transporter( (pipe.source.support_event, self._get_notifier(transport, pipe))) self.ctxt = context.get_admin_context() self.group_id = self.NOTIFICATION_NAMESPACE else: # FIXME(sileht): endpoint use notification_topics option # and it should not because this is oslo_messaging option # not a ceilometer, until we have a something to get # the notification_topics in an other way # we must create a transport to ensure the option have # beeen registered by oslo_messaging messaging.get_notifier(transport, '') pipe_manager = self.pipeline_manager if cfg.CONF.notification.store_events: event_pipe_manager = self.event_pipeline_manager self.group_id = None self.listeners, self.pipeline_listeners = [], [] self._configure_main_queue_listeners(pipe_manager, event_pipe_manager) if cfg.CONF.notification.workload_partitioning: self.partition_coordinator.join_group(self.group_id) self._configure_pipeline_listeners() self.partition_coordinator.watch_group(self.group_id, self._refresh_agent) self.tg.add_timer(cfg.CONF.coordination.heartbeat, self.partition_coordinator.heartbeat) self.tg.add_timer(cfg.CONF.coordination.check_watchers, self.partition_coordinator.run_watchers) if not cfg.CONF.notification.disable_non_metric_meters: LOG.warning(_LW('Non-metric meters may be collected. It is highly ' 'advisable to disable these meters using ' 'ceilometer.conf or the pipeline.yaml')) # Add a dummy thread to have wait() working self.tg.add_timer(604800, lambda: None)
def __call__(self, *args, **kwargs): result = super(DeduplicatedCfgList, self).__call__(*args, **kwargs) if len(result) != len(set(result)): LOG.warning( _LW("Duplicated values: %s found in CLI options, " "auto de-duplidated"), result) result = list(set(result)) return result
def get_samples(self, manager, cache, resources): self._inspection_duration = self._record_poll_time() for instance in resources: LOG.debug('Checking memory usage for instance %s', instance.id) try: memory_info = self.inspector.inspect_memory_usage( instance, self._inspection_duration) LOG.debug("MEMORY USAGE: %(instance)s %(usage)f", {'instance': instance, 'usage': memory_info.usage}) yield util.make_sample_from_instance( self.conf, instance, name='memory.usage', type=sample.TYPE_GAUGE, unit='MB', volume=memory_info.usage, ) except virt_inspector.InstanceNotFoundException as err: # Instance was deleted while getting samples. Ignore it. LOG.debug('Exception while getting samples %s', err) except virt_inspector.InstanceShutOffException as e: LOG.debug('Instance %(instance_id)s was shut off while ' 'getting samples of %(pollster)s: %(exc)s', {'instance_id': instance.id, 'pollster': self.__class__.__name__, 'exc': e}) except virt_inspector.InstanceNoDataException as e: LOG.warning(_LW('Cannot inspect data of %(pollster)s for ' '%(instance_id)s, non-fatal reason: %(exc)s'), {'pollster': self.__class__.__name__, 'instance_id': instance.id, 'exc': e}) except virt_inspector.NoDataException as e: LOG.warning(_LW('Cannot inspect data of %(pollster)s for ' '%(instance_id)s: %(exc)s'), {'pollster': self.__class__.__name__, 'instance_id': instance.id, 'exc': e}) raise plugin_base.PollsterPermanentError(resources) except ceilometer.NotImplementedError: # Selected inspector does not implement this pollster. LOG.debug('Obtaining Memory Usage is not implemented for %s', self.inspector.__class__.__name__) raise plugin_base.PollsterPermanentError(resources) except Exception as err: LOG.exception(_LE('Could not get Memory Usage for ' '%(id)s: %(e)s'), {'id': instance.id, 'e': err})
def trait_value(self, match_list): if len(match_list) != 2: LOG.warning(_LW('Timedelta plugin is required two timestamp fields' ' to create timedelta value.')) return start, end = match_list try: start_time = timeutils.parse_isotime(start[1]) end_time = timeutils.parse_isotime(end[1]) except Exception as err: LOG.warning(_LW('Failed to parse date from set fields, both ' 'fields %(start)s and %(end)s must be datetime: ' '%(err)s') % dict(start=start[0], end=end[0], err=err) ) return return abs((end_time - start_time).total_seconds())
def start(self): super(NotificationService, self).start() self.partition_coordinator = None self.coord_lock = threading.Lock() self.listeners, self.pipeline_listeners = [], [] self.pipeline_manager = pipeline.setup_pipeline() if cfg.CONF.notification.store_events: self.event_pipeline_manager = pipeline.setup_event_pipeline() self.transport = messaging.get_transport() if cfg.CONF.notification.workload_partitioning: self.ctxt = context.get_admin_context() self.group_id = self.NOTIFICATION_NAMESPACE self.partition_coordinator = coordination.PartitionCoordinator() self.partition_coordinator.start() else: # FIXME(sileht): endpoint uses the notification_topics option # and it should not because this is an oslo_messaging option # not a ceilometer. Until we have something to get the # notification_topics in another way, we must create a transport # to ensure the option has been registered by oslo_messaging. messaging.get_notifier(self.transport, '') self.group_id = None self.pipe_manager = self._get_pipe_manager(self.transport, self.pipeline_manager) self.event_pipe_manager = self._get_event_pipeline_manager( self.transport) self.listeners, self.pipeline_listeners = [], [] self._configure_main_queue_listeners(self.pipe_manager, self.event_pipe_manager) if cfg.CONF.notification.workload_partitioning: # join group after all manager set up is configured self.partition_coordinator.join_group(self.group_id) self.partition_coordinator.watch_group(self.group_id, self._refresh_agent) self.tg.add_timer(cfg.CONF.coordination.heartbeat, self.partition_coordinator.heartbeat) self.tg.add_timer(cfg.CONF.coordination.check_watchers, self.partition_coordinator.run_watchers) # configure pipelines after all coordination is configured. self._configure_pipeline_listeners() if not cfg.CONF.notification.disable_non_metric_meters: LOG.warning( _LW('Non-metric meters may be collected. It is highly ' 'advisable to disable these meters using ' 'ceilometer.conf or the pipeline.yaml')) # Add a dummy thread to have wait() working self.tg.add_timer(604800, lambda: None) self.init_pipeline_refresh()
def discover(self, discovery=None, discovery_cache=None): resources = [] discovery = discovery or [] for url in discovery: if discovery_cache is not None and url in discovery_cache: resources.extend(discovery_cache[url]) continue name, param = self._parse_discoverer(url) discoverer = self._discoverer(name) if discoverer: try: if discoverer.KEYSTONE_REQUIRED_FOR_SERVICE: service_type = getattr( self.conf.service_types, discoverer.KEYSTONE_REQUIRED_FOR_SERVICE) if not keystone_client.get_service_catalog( self.keystone).get_endpoints( service_type=service_type): LOG.warning(_LW( 'Skipping %(name)s, %(service_type)s service ' 'is not registered in keystone'), {'name': name, 'service_type': service_type}) continue discovered = discoverer.discover(self, param) if self.partition_coordinator: partitioned = ( self.partition_coordinator.extract_my_subset( self.construct_group_id(discoverer.group_id), discovered) ) else: partitioned = discovered resources.extend(partitioned) if discovery_cache is not None: discovery_cache[url] = partitioned except ka_exceptions.ClientException as e: LOG.error(_LE('Skipping %(name)s, keystone issue: ' '%(exc)s'), {'name': name, 'exc': e}) except Exception as err: LOG.exception(_LE('Unable to discover resources: %s'), err) else: LOG.warning(_LW('Unknown discovery extension: %s'), name) return resources
def discover(manager, param=None): endpoints = manager.keystone.service_catalog.get_urls( service_type=param, endpoint_type=cfg.CONF.service_credentials.os_endpoint_type, region_name=cfg.CONF.service_credentials.os_region_name) if not endpoints: LOG.warning(_LW('No endpoints found for service %s'), "<all services>" if param is None else param) return [] return endpoints
def start_udp(self): address_family = socket.AF_INET if netutils.is_valid_ipv6(self.conf.collector.udp_address): address_family = socket.AF_INET6 udp = socket.socket(address_family, socket.SOCK_DGRAM) udp.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) try: # NOTE(zhengwei): linux kernel >= 3.9 udp.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEPORT, 1) except Exception: LOG.warning( _LW("System does not support socket.SO_REUSEPORT " "option. Only one worker will be able to process " "incoming data.")) udp.bind( (self.conf.collector.udp_address, self.conf.collector.udp_port)) self.udp_run = True while self.udp_run: # NOTE(sileht): return every 10 seconds to allow # clear shutdown if not select.select([udp], [], [], 10.0)[0]: continue # NOTE(jd) Arbitrary limit of 64K because that ought to be # enough for anybody. data, source = udp.recvfrom(64 * units.Ki) try: sample = msgpack.loads(data, encoding='utf-8') except Exception: LOG.warning(_("UDP: Cannot decode data sent by %s"), source) else: if publisher_utils.verify_signature( sample, self.conf.publisher.telemetry_secret): try: LOG.debug("UDP: Storing %s", sample) self.meter_manager.map_method('record_metering_data', sample) except Exception: LOG.exception(_("UDP: Unable to store meter")) else: LOG.warning( _LW('sample signature invalid, ' 'discarding: %s'), sample)
def verify_and_record_events(self, events): """Verify event signature and record them.""" goods = [] for event in events: if utils.verify_signature( event, self.conf.publisher.telemetry_secret): goods.append(event) else: LOG.warning(_LW( 'event signature invalid, discarding event: %s'), event) return self.record_events(goods)
def __call__(self, *args, **kwargs): result = super(DeduplicatedCfgList, self).__call__(*args, **kwargs) result_set = set(result) if len(result) != len(result_set): LOG.warning(_LW("Duplicated values: %s found in CLI options, " "auto de-duplicated"), result) result = list(result_set) if self.choices and not (result_set <= set(self.choices)): raise Exception('Valid values are %s, but found %s' % (self.choices, result)) return result
def start_udp(self): address_family = socket.AF_INET if netutils.is_valid_ipv6(self.conf.collector.udp_address): address_family = socket.AF_INET6 udp = socket.socket(address_family, socket.SOCK_DGRAM) udp.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) try: # NOTE(zhengwei): linux kernel >= 3.9 udp.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEPORT, 1) except Exception: LOG.warning(_LW("System does not support socket.SO_REUSEPORT " "option. Only one worker will be able to process " "incoming data.")) udp.bind((self.conf.collector.udp_address, self.conf.collector.udp_port)) self.udp_run = True while self.udp_run: # NOTE(sileht): return every 10 seconds to allow # clear shutdown if not select.select([udp], [], [], 10.0)[0]: continue # NOTE(jd) Arbitrary limit of 64K because that ought to be # enough for anybody. data, source = udp.recvfrom(64 * units.Ki) try: sample = msgpack.loads(data, encoding='utf-8') except Exception: LOG.warning(_("UDP: Cannot decode data sent by %s"), source) else: if publisher_utils.verify_signature( sample, self.conf.publisher.telemetry_secret): try: LOG.debug("UDP: Storing %s", sample) self.meter_manager.map_method( 'record_metering_data', sample) except Exception: LOG.exception(_("UDP: Unable to store meter")) else: LOG.warning(_LW('sample signature invalid, ' 'discarding: %s'), sample)