def test_mongo2(aggregator, check, instance_user): # Run the check against our running server check.check(instance_user) # Service checks service_checks = list(itervalues(aggregator._service_checks)) service_checks_count = len(service_checks) assert service_checks_count > 0 assert len(service_checks[0]) == 1 # Assert that all service checks have the proper tags: host and port for sc in service_checks[0]: assert to_string('host:{}'.format(common.HOST)) in sc.tags assert (to_string('port:{}'.format(common.PORT1)) in sc.tags or to_string('port:{}'.format(common.PORT2)) in sc.tags) assert 'db:test' in sc.tags # Metric assertions metrics = list(itervalues(aggregator._metrics)) assert metrics assert len(metrics) > 0 for m in metrics: metric = m[0] metric_name = metric.name if metric_name in METRIC_VAL_CHECKS: assert METRIC_VAL_CHECKS[metric_name](metric.value)
def get_pcf_channel_metrics(self, queue_manager): args = {pymqi.CMQCFC.MQCACH_CHANNEL_NAME: pymqi.ensure_bytes('*')} try: pcf = pymqi.PCFExecute(queue_manager, convert=self.config.convert_endianness) response = pcf.MQCMD_INQUIRE_CHANNEL(args) except pymqi.MQMIError as e: self.log.warning("Error getting CHANNEL stats %s", e) else: channels = len(response) mname = '{}.channel.channels'.format(metrics.METRIC_PREFIX) self.gauge(mname, channels, tags=self.config.tags_no_channel) for channel_info in response: channel_name = to_string( channel_info[pymqi.CMQCFC.MQCACH_CHANNEL_NAME]).strip() channel_tags = self.config.tags_no_channel + [ "channel:{}".format(channel_name) ] self._submit_metrics_from_properties(channel_info, metrics.channel_metrics(), channel_tags) # Check specific channels # If a channel is not discoverable, a user may want to check it specifically. # Specific channels are checked first to send channel metrics and `ibm_mq.channel` service checks # at the same time, but the end result is the same in any order. for channel in self.config.channels: self._submit_channel_status(queue_manager, channel, self.config.tags_no_channel) # Grab all the discoverable channels self._submit_channel_status(queue_manager, '*', self.config.tags_no_channel)
def _create_event(self, status, tags=None): hostname = to_string(self.hostname) if status == "red": alert_type = "error" msg_title = "{} is {}".format(hostname, status) elif status == "yellow": alert_type = "warning" msg_title = "{} is {}".format(hostname, status) else: # then it should be green alert_type = "success" msg_title = "{} recovered as {}".format(hostname, status) msg = "ElasticSearch: {} just reported as {}".format(hostname, status) return { 'timestamp': int(time.time()), 'event_type': 'elasticsearch', 'host': hostname, 'msg_text': msg, 'msg_title': msg_title, 'alert_type': alert_type, 'source_type_name': "elasticsearch", 'event_object': hostname, 'tags': tags, }
def _process_service_check(self, data, custom_tags=None): """Report a service check, tagged by the service and the backend. Statuses are defined in `STATUS_TO_SERVICE_CHECK` mapping. """ custom_tags = [] if custom_tags is None else custom_tags service_name = data['pxname'] status = data['status'] haproxy_hostname = to_string(self.hostname) check_hostname = haproxy_hostname if self.tag_service_check_by_host else '' if self._is_service_excl_filtered(service_name): return if status in Services.STATUS_TO_SERVICE_CHECK: service_check_tags = ["haproxy_service:%s" % service_name] service_check_tags.extend(custom_tags) self._handle_legacy_service_tag(service_check_tags, service_name) hostname = data['svname'] if data['back_or_front'] == Services.BACKEND: service_check_tags.append('backend:%s' % hostname) status = Services.STATUS_TO_SERVICE_CHECK[status] message = "%s reported %s:%s %s" % (haproxy_hostname, service_name, hostname, status) self.service_check(self.SERVICE_CHECK_NAME, status, message=message, hostname=check_hostname, tags=service_check_tags)
def sanitize_strings(s): """ Sanitize strings from pymqi responses """ s = to_string(s) found = s.find('\x00') if found >= 0: s = s[:found] return s.strip()
def test_external_host_tags(aggregator, realtime_instance): realtime_instance['collect_tags'] = True check = VSphereCheck('vsphere', {}, [realtime_instance]) config = VSphereConfig(realtime_instance, MagicMock()) check.api = MockedAPI(config) check.api_rest = VSphereRestAPI(config, MagicMock()) with check.tags_cache.update(): check.refresh_tags_cache() with check.infrastructure_cache.update(): check.refresh_infrastructure_cache() fixture_file = os.path.join(HERE, 'fixtures', 'host_tags_values.json') with open(fixture_file, 'r') as f: expected_tags = json.load(f) check.set_external_tags = MagicMock() check.submit_external_host_tags() submitted_tags = check.set_external_tags.mock_calls[0].args[0] submitted_tags.sort(key=lambda x: x[0]) for ex, sub in zip(expected_tags, submitted_tags): ex_host, sub_host = ex[0], sub[0] ex_tags, sub_tags = ex[1]['vsphere'], sub[1]['vsphere'] ex_tags = [ to_string(t) for t in ex_tags ] # json library loads data in unicode, let's convert back to native assert ex_host == sub_host assert ex_tags == sub_tags check.config.excluded_host_tags = ['vsphere_host'] check.set_external_tags = MagicMock() check.submit_external_host_tags() submitted_tags = check.set_external_tags.mock_calls[0].args[0] submitted_tags.sort(key=lambda x: x[0]) for ex, sub in zip(expected_tags, submitted_tags): ex_host, sub_host = ex[0], sub[0] ex_tags, sub_tags = ex[1]['vsphere'], sub[1]['vsphere'] ex_tags = [to_string(t) for t in ex_tags if 'vsphere_host:' not in t] assert ex_host == sub_host assert ex_tags == sub_tags check.set_external_tags = MagicMock() check.submit_external_host_tags()
def dd_environment(): with terraform_run(os.path.join(get_here(), 'terraform')) as outputs: kubeconfig = to_string(outputs['kubeconfig']['value']) with port_forward(kubeconfig, 'linkerd', 'linkerd-controller', 4191) as (ip, port): instance = { 'prometheus_url': 'http://{}:{}/metrics'.format(ip, port), 'metrics': [LINKERD_FIXTURE_METRICS], 'type_overrides': LINKERD_FIXTURE_TYPES, } yield instance
def test_encoding(self, aggregator, msg_text): check = AgentCheck() event = { 'event_type': 'new.event', 'msg_title': 'new test event', 'aggregation_key': 'test.event', 'msg_text': msg_text, 'tags': ['∆', u'Ω-bar'], 'timestamp': 1, } check.event(event) aggregator.assert_event(to_string(msg_text), tags=['∆', 'Ω-bar'])
def _submit_channel_status(self, queue_manager, search_channel_name, tags, channels_to_skip=None): """Submit channel status Note: Error 3065 (MQRCCF_CHL_STATUS_NOT_FOUND) might indicate that the channel has not been used. More info: https://www.ibm.com/support/knowledgecenter/SSFKSJ_7.1.0/com.ibm.mq.doc/fm16690_.htm :param search_channel_name might contain wildcard characters """ channels_to_skip = channels_to_skip or [] search_channel_tags = tags + ["channel:{}".format(search_channel_name)] pcf = None try: args = {pymqi.CMQCFC.MQCACH_CHANNEL_NAME: pymqi.ensure_bytes(search_channel_name)} pcf = pymqi.PCFExecute( queue_manager, response_wait_interval=self.config.timeout, convert=self.config.convert_endianness ) response = pcf.MQCMD_INQUIRE_CHANNEL_STATUS(args) self.service_check( self.CHANNEL_SERVICE_CHECK, AgentCheck.OK, search_channel_tags, hostname=self.config.hostname ) except pymqi.MQMIError as e: if e.comp == pymqi.CMQC.MQCC_FAILED and e.reason == pymqi.CMQCFC.MQRCCF_CHL_STATUS_NOT_FOUND: self.service_check( self.CHANNEL_SERVICE_CHECK, AgentCheck.CRITICAL, search_channel_tags, hostname=self.config.hostname ) self.log.debug("Channel status not found for channel %s: %s", search_channel_name, e) elif e.comp == pymqi.CMQC.MQCC_FAILED and e.reason == pymqi.CMQC.MQRC_NO_MSG_AVAILABLE: self.service_check( self.CHANNEL_SERVICE_CHECK, AgentCheck.UNKNOWN, search_channel_tags, hostname=self.config.hostname ) self.log.debug("There are no messages available for channel %s", search_channel_name) else: self.service_check( self.CHANNEL_SERVICE_CHECK, AgentCheck.CRITICAL, search_channel_tags, hostname=self.config.hostname ) self.log.warning("Error getting CHANNEL status for channel %s: %s", search_channel_name, e) else: for channel_info in response: channel_name = to_string(channel_info[pymqi.CMQCFC.MQCACH_CHANNEL_NAME]).strip() if channel_name in channels_to_skip: continue channel_tags = tags + ["channel:{}".format(channel_name)] self._submit_metrics_from_properties( channel_info, channel_name, metrics.channel_status_metrics(), channel_tags ) channel_status = channel_info[pymqi.CMQCFC.MQIACH_CHANNEL_STATUS] self._submit_channel_count(channel_name, channel_status, channel_tags) self._submit_status_check(channel_name, channel_status, channel_tags) finally: if pcf is not None: pcf.disconnect()
def get_tags_recursively(mor, infrastructure_data, config, include_only=None): # type: (vim.ManagedEntity, InfrastructureData, VSphereConfig, Optional[List[str]]) -> List[str] """Go up the resources hierarchy from the given mor. Note that a host running a VM is not considered to be a parent of that VM. rootFolder(vim.Folder): - vm(vim.Folder): VM1-1 VM1-2 - host(vim.Folder): HOST1 HOST2 """ tags = [] properties = infrastructure_data.get(mor, {}) entity_name = to_string(properties.get('name', 'unknown')) if isinstance(mor, vim.HostSystem): tags.append('vsphere_host:{}'.format(entity_name)) elif isinstance(mor, vim.Folder): if isinstance(mor, vim.StoragePod): tags.append('vsphere_datastore_cluster:{}'.format(entity_name)) # Legacy mode: keep it as "folder" if config.include_datastore_cluster_folder_tag: tags.append('vsphere_folder:{}'.format(entity_name)) else: tags.append('vsphere_folder:{}'.format(entity_name)) elif isinstance(mor, vim.ComputeResource): if isinstance(mor, vim.ClusterComputeResource): tags.append('vsphere_cluster:{}'.format(entity_name)) tags.append('vsphere_compute:{}'.format(entity_name)) elif isinstance(mor, vim.Datacenter): tags.append('vsphere_datacenter:{}'.format(entity_name)) elif isinstance(mor, vim.Datastore): tags.append('vsphere_datastore:{}'.format(entity_name)) parent = infrastructure_data.get(mor, {}).get('parent') if parent is None: return tags tags.extend(get_tags_recursively(parent, infrastructure_data, config)) if not include_only: return tags filtered_tags = [] for tag in tags: for prefix in include_only: if not tag.startswith(prefix + ":"): continue filtered_tags.append(tag) return filtered_tags
def _discover_queues(self, queue_manager, mq_pattern_filter): queues = [] for queue_type in SUPPORTED_QUEUE_TYPES: args = {pymqi.CMQC.MQCA_Q_NAME: pymqi.ensure_bytes(mq_pattern_filter), pymqi.CMQC.MQIA_Q_TYPE: queue_type} try: pcf = pymqi.PCFExecute(queue_manager, convert=self.config.convert_endianness) response = pcf.MQCMD_INQUIRE_Q(args) except pymqi.MQMIError as e: self.warning("Error discovering queue: %s", e) else: for queue_info in response: queue = queue_info[pymqi.CMQC.MQCA_Q_NAME] queues.append(to_string(queue).strip()) return queues
def get_parent_tags_recursively(mor, infrastructure_data, config): # type: (vim.ManagedEntity, InfrastructureData, VSphereConfig) -> List[str] """Go up the resources hierarchy from the given mor. Note that a host running a VM is not considered to be a parent of that VM. rootFolder(vim.Folder): - vm(vim.Folder): VM1-1 VM1-2 - host(vim.Folder): HOST1 HOST2 """ mor_props = infrastructure_data[mor] parent = mor_props.get('parent') if parent: tags = [] parent_props = infrastructure_data.get(parent, {}) parent_name = to_string(parent_props.get('name', 'unknown')) if isinstance(parent, vim.HostSystem): tags.append('vsphere_host:{}'.format(parent_name)) elif isinstance(parent, vim.Folder): if isinstance(parent, vim.StoragePod): tags.append('vsphere_datastore_cluster:{}'.format(parent_name)) # Legacy mode: keep it as "folder" if config.include_datastore_cluster_folder_tag: tags.append('vsphere_folder:{}'.format(parent_name)) else: tags.append('vsphere_folder:{}'.format(parent_name)) elif isinstance(parent, vim.ComputeResource): if isinstance(parent, vim.ClusterComputeResource): tags.append('vsphere_cluster:{}'.format(parent_name)) tags.append('vsphere_compute:{}'.format(parent_name)) elif isinstance(parent, vim.Datacenter): tags.append('vsphere_datacenter:{}'.format(parent_name)) elif isinstance(parent, vim.Datastore): tags.append('vsphere_datastore:{}'.format(parent_name)) parent_tags = get_parent_tags_recursively(parent, infrastructure_data, config) parent_tags.extend(tags) return parent_tags return []
def _discover_queues(self, queue_manager, mq_pattern_filter): queues = [] for queue_type in SUPPORTED_QUEUE_TYPES: args = {pymqi.CMQC.MQCA_Q_NAME: pymqi.ensure_bytes(mq_pattern_filter), pymqi.CMQC.MQIA_Q_TYPE: queue_type} try: pcf = pymqi.PCFExecute(queue_manager, convert=self.config.convert_endianness) response = pcf.MQCMD_INQUIRE_Q(args) except pymqi.MQMIError as e: # Don't warn if no messages, see: # https://github.com/dsuch/pymqi/blob/v1.12.0/docs/examples.rst#how-to-wait-for-multiple-messages if not (e.comp == pymqi.CMQC.MQCC_FAILED and e.reason == pymqi.CMQC.MQRC_NO_MSG_AVAILABLE): self.warning("Error discovering queue: %s", e) else: for queue_info in response: queue = queue_info[pymqi.CMQC.MQCA_Q_NAME] queues.append(to_string(queue).strip()) return queues
def _discover_queues(self, queue_manager, mq_pattern_filter): # type: (pymqi.QueueManager, str) -> List[str] queues = [] for queue_type in SUPPORTED_QUEUE_TYPES: args = {pymqi.CMQC.MQCA_Q_NAME: pymqi.ensure_bytes(mq_pattern_filter), pymqi.CMQC.MQIA_Q_TYPE: queue_type} pcf = None try: pcf = pymqi.PCFExecute( queue_manager, response_wait_interval=self.config.timeout, convert=self.config.convert_endianness ) response = pcf.MQCMD_INQUIRE_Q(args) except pymqi.MQMIError as e: # Don't warn if no messages, see: # https://github.com/dsuch/pymqi/blob/v1.12.0/docs/examples.rst#how-to-wait-for-multiple-messages if e.comp == pymqi.CMQC.MQCC_FAILED and e.reason == pymqi.CMQC.MQRC_NO_MSG_AVAILABLE: self.log.debug("No queue info available") elif e.comp == pymqi.CMQC.MQCC_FAILED and e.reason == pymqi.CMQC.MQRC_UNKNOWN_OBJECT_NAME: self.log.debug("No matching queue of type %d for pattern %s", queue_type, mq_pattern_filter) else: self.warning("Error discovering queue: %s", e) else: for queue_info in response: queue = queue_info.get(pymqi.CMQC.MQCA_Q_NAME, None) if queue: queue_name = to_string(queue).strip() self.log.debug("Discovered queue: %s", queue_name) queues.append(queue_name) else: self.log.debug('Discovered queue with empty name, skipping.') continue self.log.debug("%s queues discovered", str(len(queues))) finally: # Close internal reply queue to prevent filling up a dead-letter queue. # https://github.com/dsuch/pymqi/blob/084ab0b2638f9d27303a2844badc76635c4ad6de/code/pymqi/__init__.py#L2892-L2902 # https://dsuch.github.io/pymqi/examples.html#how-to-specify-dynamic-reply-to-queues if pcf is not None: pcf.disconnect() if not queues: self.warning("No matching queue of type MQQT_LOCAL or MQQT_REMOTE for pattern %s", mq_pattern_filter) return queues
def get_pcf_channel_metrics(self, queue_manager): discovered_channels = self._discover_channels(queue_manager) if discovered_channels: num_channels = len(discovered_channels) mname = '{}.channel.channels'.format(metrics.METRIC_PREFIX) self.gauge(mname, num_channels, tags=self.config.tags_no_channel, hostname=self.config.hostname) for channel_info in discovered_channels: channel_name = to_string(channel_info[pymqi.CMQCFC.MQCACH_CHANNEL_NAME]).strip() channel_tags = self.config.tags_no_channel + ["channel:{}".format(channel_name)] self._submit_metrics_from_properties( channel_info, channel_name, metrics.channel_metrics(), channel_tags ) # Check specific channels # If a channel is not discoverable, a user may want to check it specifically. # Specific channels are checked first to send channel metrics and `ibm_mq.channel` service checks # at the same time, but the end result is the same in any order. for channel in self.config.channels: self._submit_channel_status(queue_manager, channel, self.config.tags_no_channel) # Grab all the discoverable channels if self.config.auto_discover_channels: self._submit_channel_status(queue_manager, '*', self.config.tags_no_channel)
def get_parent_tags_recursively(mor, infrastructure_data): """Go up the resources hierarchy from the given mor. Note that a host running a VM is not considered to be a parent of that VM. rootFolder(vim.Folder): - vm(vim.Folder): VM1-1 VM1-2 - host(vim.Folder): HOST1 HOST2 """ mor_props = infrastructure_data.get(mor) parent = mor_props.get('parent') if parent: tags = [] parent_props = infrastructure_data.get(parent, {}) parent_name = to_string(parent_props.get('name', 'unknown')) if isinstance(parent, vim.HostSystem): tags.append('vsphere_host:{}'.format(parent_name)) elif isinstance(parent, vim.Folder): tags.append('vsphere_folder:{}'.format(parent_name)) elif isinstance(parent, vim.ComputeResource): if isinstance(parent, vim.ClusterComputeResource): tags.append('vsphere_cluster:{}'.format(parent_name)) tags.append('vsphere_compute:{}'.format(parent_name)) elif isinstance(parent, vim.Datacenter): tags.append('vsphere_datacenter:{}'.format(parent_name)) elif isinstance(parent, vim.Datastore): tags.append('vsphere_datastore:{}'.format(parent_name)) parent_tags = get_parent_tags_recursively(parent, infrastructure_data) parent_tags.extend(tags) return parent_tags return []
def _collect_metrics_async(self, instance, query_specs): """ Task that collects the metrics listed in the morlist for one MOR """ # ## <TEST-INSTRUMENTATION> t = Timer() # ## </TEST-INSTRUMENTATION> i_key = self._instance_key(instance) server_instance = self._get_server_instance(instance) perfManager = server_instance.content.perfManager results = perfManager.QueryPerf(query_specs) if results: for mor_perfs in results: mor_name = str(mor_perfs.entity) try: mor = self.mor_cache.get_mor(i_key, mor_name) except MorNotFoundError: self.log.error( "Trying to get metrics from object %s deleted from the cache, skipping. " "Consider increasing the parameter `clean_morlist_interval` to avoid that", mor_name, ) continue for result in mor_perfs.value: counter_id = result.id.counterId if not self.metadata_cache.contains(i_key, counter_id): self.log.debug( "Skipping value for counter %s, because there is no metadata about it", ensure_unicode(counter_id), ) continue # Metric types are absolute, delta, and rate metric_name = self.metadata_cache.get_metadata( i_key, result.id.counterId).get('name') if self.in_compatibility_mode(instance): if metric_name not in ALL_METRICS: self.log.debug("Skipping unknown `%s` metric.", ensure_unicode(metric_name)) continue if not result.value: self.log.debug( "Skipping `%s` metric because the value is empty", ensure_unicode(metric_name)) continue instance_name = result.id.instance or "none" value = self._transform_value(instance, result.id.counterId, result.value[0]) hostname = mor['hostname'] tags = [ 'instance:{}'.format(ensure_unicode(instance_name)) ] if not hostname: # no host tags available tags.extend(mor['tags']) else: hostname = to_string(hostname) tags.extend(instance.get('tags', [])) # vsphere "rates" should be submitted as gauges (rate is # precomputed). self.gauge("vsphere.{}".format( ensure_unicode(metric_name)), value, hostname=hostname, tags=tags) # ## <TEST-INSTRUMENTATION> custom_tags = instance.get('tags', []) + ['instance:{}'.format(i_key)] self.histogram('datadog.agent.vsphere.metric_colection.time', t.total(), tags=custom_tags)
def submit_metrics_callback(self, query_results): # type: (List[vim.PerformanceManager.EntityMetricBase]) -> None """ Callback of the collection of metrics. This is run in the main thread! `query_results` currently contain results of one resource type in practice, but this function is generic and can handle results with mixed resource types. """ # `have_instance_value` is used later to avoid collecting aggregated metrics # when instance metrics are collected. have_instance_value = defaultdict( set) # type: Dict[Type[vim.ManagedEntity], Set[MetricName]] for results_per_mor in query_results: resource_type = type(results_per_mor.entity) metadata = self.metrics_metadata_cache.get_metadata(resource_type) for result in results_per_mor.value: if result.id.instance: have_instance_value[resource_type].add( metadata[result.id.counterId]) for results_per_mor in query_results: mor_props = self.infrastructure_cache.get_mor_props( results_per_mor.entity) if mor_props is None: self.log.debug( "Skipping results for mor %s because the integration is not yet aware of it. If this is a problem" " you can increase the value of 'refresh_infrastructure_cache_interval'.", results_per_mor.entity, ) continue self.log.debug( "Retrieved mor props for entity %s: %s", results_per_mor.entity, mor_props, ) resource_type = type(results_per_mor.entity) metadata = self.metrics_metadata_cache.get_metadata(resource_type) for result in results_per_mor.value: metric_name = metadata.get(result.id.counterId) if self.log.isEnabledFor(logging.DEBUG): # Use isEnabledFor to avoid unnecessary processing self.log.debug( "Processing metric `%s`: resource_type=`%s`, result=`%s`", metric_name, resource_type, str(result).replace("\n", "\\n"), ) if not metric_name: # Fail-safe self.log.debug( "Skipping value for counter %s, because the integration doesn't have metadata about it. If this" " is a problem you can increase the value of 'refresh_metrics_metadata_cache_interval'", result.id.counterId, ) continue if not result.value: self.log.debug( "Skipping metric %s because the value is empty", to_string(metric_name)) continue # Get the most recent value that isn't negative valid_values = [v for v in result.value if v >= 0] if not valid_values: self.log.debug( "Skipping metric %s because the value returned by vCenter" " is negative (i.e. the metric is not yet available). values: %s", to_string(metric_name), list(result.value), ) continue tags = [] if should_collect_per_instance_values( self.config, metric_name, resource_type) and ( metric_name in have_instance_value[resource_type]): instance_value = result.id.instance # When collecting per instance values, it's possible that both aggregated metric and per instance # metrics are received. In that case, the metric with no instance value is skipped. if not instance_value: continue instance_tag_key = get_mapped_instance_tag(metric_name) tags.append('{}:{}'.format(instance_tag_key, instance_value)) vsphere_tags = self.infrastructure_cache.get_mor_tags( results_per_mor.entity) mor_tags = mor_props['tags'] + vsphere_tags if resource_type in HISTORICAL_RESOURCES: # Tags are attached to the metrics tags.extend(mor_tags) hostname = None else: # Tags are (mostly) submitted as external host tags. hostname = to_string(mor_props.get('hostname')) if self.config.excluded_host_tags: tags.extend([ t for t in mor_tags if t.split(":", 1)[0] in self.config.excluded_host_tags ]) tags.extend(self.config.base_tags) value = valid_values[-1] if metric_name in PERCENT_METRICS: # Convert the percentage to a float. value /= 100.0 self.log.debug( "Submit metric: name=`%s`, value=`%s`, hostname=`%s`, tags=`%s`", metric_name, value, hostname, tags, ) # vSphere "rates" should be submitted as gauges (rate is precomputed). self.gauge(to_string(metric_name), value, hostname=hostname, tags=tags)
def refresh_infrastructure_cache(self): # type: () -> None """Fetch the complete infrastructure, generate tags for each monitored resources and store all of that into the infrastructure_cache. It also computes the resource `hostname` property to be used when submitting metrics for this mor.""" self.log.debug("Refreshing the infrastructure cache...") t0 = Timer() infrastructure_data = self.api.get_infrastructure() self.gauge( "datadog.vsphere.refresh_infrastructure_cache.time", t0.total(), tags=self.config.base_tags, raw=True, hostname=self._hostname, ) self.log.debug("Infrastructure cache refreshed in %.3f seconds.", t0.total()) self.log.debug("Infrastructure cache: %s", infrastructure_data) all_tags = {} if self.config.should_collect_tags: all_tags = self.collect_tags(infrastructure_data) self.infrastructure_cache.set_all_tags(all_tags) for mor, properties in iteritems(infrastructure_data): if not isinstance(mor, tuple( self.config.collected_resource_types)): # Do nothing for the resource types we do not collect continue if not is_resource_collected_by_filters( mor, infrastructure_data, self.config.resource_filters, self.infrastructure_cache.get_mor_tags(mor)): # The resource does not match the specified whitelist/blacklist patterns. continue mor_name = to_string(properties.get("name", "unknown")) mor_type_str = MOR_TYPE_AS_STRING[type(mor)] hostname = None tags = [] if isinstance(mor, vim.VirtualMachine): power_state = properties.get("runtime.powerState") if power_state != vim.VirtualMachinePowerState.poweredOn: # Skipping because the VM is not powered on # TODO: Sometimes VM are "poweredOn" but "disconnected" and thus have no metrics self.log.debug("Skipping VM %s in state %s", mor_name, to_string(power_state)) continue # Hosts are not considered as parents of the VMs they run, we use the `runtime.host` property # to get the name of the ESXi host runtime_host = properties.get("runtime.host") runtime_host_props = infrastructure_data[ runtime_host] if runtime_host else {} runtime_hostname = to_string( runtime_host_props.get("name", "unknown")) tags.append('vsphere_host:{}'.format(runtime_hostname)) if self.config.use_guest_hostname: hostname = properties.get("guest.hostName", mor_name) else: hostname = mor_name elif isinstance(mor, vim.HostSystem): hostname = mor_name else: tags.append('vsphere_{}:{}'.format(mor_type_str, mor_name)) tags.extend(get_parent_tags_recursively(mor, infrastructure_data)) tags.append('vsphere_type:{}'.format(mor_type_str)) # Attach tags from fetched attributes. tags.extend(properties.get('attributes', [])) mor_payload = {"tags": tags} # type: Dict[str, Any] if hostname: mor_payload['hostname'] = hostname self.infrastructure_cache.set_mor_props(mor, mor_payload)
def format_metric_name(counter): return "{}.{}.{}".format( to_string(counter.groupInfo.key), to_string(counter.nameInfo.key), SHORT_ROLLUP[str(counter.rollupType)], )
def format_metric_name(counter): # type: (vim.PerformanceManager.PerfCounterInfo) -> MetricName return "{}.{}.{}".format(to_string(counter.groupInfo.key), to_string(counter.nameInfo.key), SHORT_ROLLUP[str(counter.rollupType)])