def assertHistogramMetric(self, metric_name, expected, buckets): # Note that Prometheus histograms are cumulative so we must sum up the successive bucket values # https://en.wikipedia.org/wiki/Histogram#Cumulative_histogram metric = self.provider._metrics[metric_name] [collected] = metric.collect() sample_name = f'{metric_name}_bucket' expected_samples = [] for key, value in expected.items(): cumulative_value = 0 for bucket in buckets: val = value.get(bucket, 0) cumulative_value += val labels = dict(key + (('le', str(float(bucket))), )) expected_samples.append( Sample(sample_name, labels, float(cumulative_value), None, None)) labels = dict(key + (('le', '+Inf'), )) cumulative_value += value.get(INF, 0) expected_samples.append( Sample(sample_name, labels, float(cumulative_value), None, None)) actual = [s for s in collected.samples if s.name.endswith('bucket')] self.assertListEqual(actual, expected_samples)
def test_get_conf_file(self, get_status_replication_tasks): expected_response_sample = [ Sample("aws_dms_replication_task_status", {"replication_task_id": "test1"}, 0), Sample("aws_dms_replication_task_status", {"replication_task_id": "test2"}, 1), Sample("aws_dms_replication_task_status", {"replication_task_id": "test3"}, 2), Sample("aws_dms_replication_task_status", {"replication_task_id": "test4"}, 3), Sample("aws_dms_replication_task_status", {"replication_task_id": "test5"}, 4), Sample("aws_dms_replication_task_status", {"replication_task_id": "test6"}, 5), Sample("aws_dms_replication_task_status", {"replication_task_id": "test7"}, 6), Sample("aws_dms_replication_task_status", {"replication_task_id": "test8"}, 7), Sample("aws_dms_replication_task_status", {"replication_task_id": "test9"}, 8), ] replication_task = AwsDmsReplicationTaskStatusCollector() result = next(replication_task.collect()) assert result.samples == expected_response_sample
def map_ha_cluster_pacemaker_nodes_status(sample): labels = sample.labels labels["status"] = nodestatus_from_rhel(sample.name) labels["node"] = labels["instname"] labels["type"] = "member" newsample = Sample("ha_cluster_pacemaker_nodes",labels,sample.value,sample.timestamp) return newsample
def nameify_sample(sample): """ If we get a prometheus_client<0.4.0 sample as a tuple, transform it into a namedtuple which has the names we expect. """ if not isinstance(sample, Sample): sample = Sample(*sample, None, None) return sample
def test_collection(): d = mock.create_autospec(pyudev.Device) t = mock.create_autospec(temper.usb_temper) t.phy.return_value = ':phy:' t.version = 'VERSIONSTRING___' t.read_sensor.return_value = [ ('temp', 'foo', 22), ('humid', 'bar', 45), ] c = Collector() c._Collector__sensors = {d: t} fams = list(c.collect()) assert fams[0].name == 'temper_temperature_celsius' assert fams[0].type == 'gauge' assert fams[0].samples == [ Sample(name='temper_temperature_celsius', labels={ 'name': 'foo', 'phy': ':phy:', 'version': 'VERSIONSTRING___' }, value=22, timestamp=None, exemplar=None) ] assert fams[1].name == 'temper_humidity_rh' assert fams[1].type == 'gauge' assert fams[1].samples == [ Sample(name='temper_humidity_rh', labels={ 'name': 'bar', 'phy': ':phy:', 'version': 'VERSIONSTRING___' }, value=45, timestamp=None, exemplar=None) ] assert c.healthy()
def map_ha_cluster_pacemaker_fail_migration(sample): labels = sample.labels parts = labels["instname"].split(':', 2) if len(parts) == 2: labels["resource"] = parts[1] labels["node"] = parts[0] else: labels["resource"] = labels["instname"] labels["node"] = labels["hostname"] newsample = Sample(sample.name,labels,sample.value,sample.timestamp) return newsample;
def map_ha_cluster_pacemaker_resources_managed(sample): labels = sample.labels labels["managed"] = "True" parts = labels["instname"].split(':', 2) if len(parts) == 2: labels["resource"] = parts[0] labels["node"] = parts[1] else: labels["resource"] = labels["instname"] labels["node"] = labels["hostname"] newsample = Sample("ha_cluster_pacemaker_resources",labels,sample.value,sample.timestamp) return newsample
def map_ha_cluster_pacemaker_resources(sample): labels = sample.labels labels["status"] = sample.name[len("ha_cluster_pacemaker_resources_status_"):] # look for node name after colon parts = labels["instname"].split(':', 2) if len(parts) == 2: labels["resource"] = parts[0] labels["node"] = parts[1] else: labels["resource"] = labels["instname"] labels["node"] = labels["hostname"] newsample = Sample("ha_cluster_pacemaker_resources",labels,sample.value,sample.timestamp) return newsample
def map_ha_cluster_pacemaker_resources_all(sample): startlowercase = lambda s: s[:1].lower() + s[1:] if s else '' labels = sample.labels if labels["managed"] == "1": labels["managed"] = "true" if labels["managed"] == "0": labels["managed"] = "false" newrole = labels["role"] if len(newrole) > 0: labels["role"] = startlowercase(newrole) newsample = Sample("ha_cluster_pacemaker_resources",labels,sample.value,sample.timestamp) return newsample
def generateJsonString(self) -> str: # The correlation_id can be used to group fields from the same metrics call correlation_id = str(uuid.uuid4()) fallback_datetime = datetime.now(timezone.utc) def prometheusSample2Dict(sample): """ Convert a prometheus metric sample to Python dictionary for serialization """ TimeGenerated = fallback_datetime if sample.timestamp: TimeGenerated = datetime.fromtimestamp(sample.timestamp, tz=timezone.utc) sample_dict = { "name": sample.name, "labels": json.dumps(sample.labels, separators=(',', ':'), sort_keys=True, cls=JsonEncoder), "value": sample.value, self.colTimeGenerated: TimeGenerated, "instance": self.providerInstance.instance, "metadata": self.providerInstance.metadata, "correlation_id": correlation_id } return sample_dict def filter_prometheus_sample(sample): """ Filter out samples matching suppressIfZeroRegex with value == 0 """ if (suppressIfZeroRegex is not None and sample.value == 0 and suppressIfZeroRegex.match(sample.name)): return False return True def filter_prometheus_metric(metric): """ Filter out names based on our exclude and include lists """ # Remove everything matching excludeRegex if self.excludeRegex.match(metric.name): return False # If includeRegex is defined, filter out everything NOT matching if (includeRegex is not None and includeRegex.match(metric.name) is None): return False # If none of the above matched, just let the item through return True prometheusMetricsText = self.lastResult[0] includeRegex = self.lastResult[1] suppressIfZeroRegex = self.lastResult[2] resultSet = list() self.tracer.info("[%s] converting result set into JSON" % self.fullName) try: if not prometheusMetricsText: raise ValueError("Empty result from prometheus instance %s", self.providerInstance.instance) for family in filter( filter_prometheus_metric, text_string_to_metric_families(prometheusMetricsText)): resultSet.extend( map(prometheusSample2Dict, filter(filter_prometheus_sample, family.samples))) except ValueError as e: self.tracer.error( "[%s] Could not parse prometheus metrics (%s): %s" % (self.fullName, e, prometheusMetricsText)) resultSet.append(prometheusSample2Dict(Sample("up", dict(), 0))) else: # The up-metric is used to determine whatever valid data could be read from # the prometheus endpoint and is used by prometheus in a similar way resultSet.append(prometheusSample2Dict(Sample("up", dict(), 1))) resultSet.append( prometheusSample2Dict( Sample( "sapmon", { "SAPMON_VERSION": const.PAYLOAD_VERSION, "PROVIDER_INSTANCE": self.providerInstance.name }, 1))) # Convert temporary dictionary into JSON string try: # Use a very compact json representation to limit amount of data parsed by LA resultJsonString = json.dumps(resultSet, sort_keys=True, separators=(',', ':'), cls=JsonEncoder) self.tracer.debug("[%s] resultJson=%s" % (self.fullName, str(resultJsonString)[:1000])) except Exception as e: self.tracer.error( "[%s] could not format logItem=%s into JSON (%s)" % (self.fullName, resultSet[:50], e)) return resultJsonString
def _decumulate_histogram_buckets(self, metric): """ Decumulate buckets in a given histogram metric and adds the lower_bound label (le being upper_bound) """ bucket_values_by_context_upper_bound = {} for sample in metric.samples: if sample[self.SAMPLE_NAME].endswith("_bucket"): context_key = self._compute_bucket_hash(sample[self.SAMPLE_LABELS]) if context_key not in bucket_values_by_context_upper_bound: bucket_values_by_context_upper_bound[context_key] = {} bucket_values_by_context_upper_bound[context_key][float(sample[self.SAMPLE_LABELS]["le"])] = sample[ self.SAMPLE_VALUE ] sorted_buckets_by_context = {} for context in bucket_values_by_context_upper_bound: sorted_buckets_by_context[context] = sorted(bucket_values_by_context_upper_bound[context]) # Tuples (lower_bound, upper_bound, value) bucket_tuples_by_context_upper_bound = {} for context in sorted_buckets_by_context: for i, upper_b in enumerate(sorted_buckets_by_context[context]): if i == 0: if context not in bucket_tuples_by_context_upper_bound: bucket_tuples_by_context_upper_bound[context] = {} if upper_b > 0: # positive buckets start at zero bucket_tuples_by_context_upper_bound[context][upper_b] = ( 0, upper_b, bucket_values_by_context_upper_bound[context][upper_b], ) else: # negative buckets start at -inf bucket_tuples_by_context_upper_bound[context][upper_b] = ( self.MINUS_INF, upper_b, bucket_values_by_context_upper_bound[context][upper_b], ) continue tmp = ( bucket_values_by_context_upper_bound[context][upper_b] - bucket_values_by_context_upper_bound[context][sorted_buckets_by_context[context][i - 1]] ) bucket_tuples_by_context_upper_bound[context][upper_b] = ( sorted_buckets_by_context[context][i - 1], upper_b, tmp, ) # modify original metric to inject lower_bound & modified value for i, sample in enumerate(metric.samples): if not sample[self.SAMPLE_NAME].endswith("_bucket"): continue context_key = self._compute_bucket_hash(sample[self.SAMPLE_LABELS]) matching_bucket_tuple = bucket_tuples_by_context_upper_bound[context_key][ float(sample[self.SAMPLE_LABELS]["le"]) ] # Replacing the sample tuple sample[self.SAMPLE_LABELS]["lower_bound"] = str(matching_bucket_tuple[0]) metric.samples[i] = Sample(sample[self.SAMPLE_NAME], sample[self.SAMPLE_LABELS], matching_bucket_tuple[2])
def _add_gauge_metric(metric, labels, value): metric.samples.append(Sample(metric.name, labels, value, None))
def generateJsonString(self) -> str: # The correlation_id can be used to group fields from the same metrics call correlation_id = str(uuid.uuid4()) fallback_datetime = datetime.now(timezone.utc) def prometheusSample2Dict(sample): """ Convert a prometheus metric sample to Python dictionary for serialization """ TimeGenerated = fallback_datetime if sample.timestamp: TimeGenerated = datetime.fromtimestamp(sample.timestamp, tz=timezone.utc) sample_dict = { "name" : sample.name, "labels" : json.dumps(sample.labels, separators=(',',':'), sort_keys=True, cls=JsonEncoder), "value" : sample.value, self.colTimeGenerated: TimeGenerated, "instance": self.providerInstance.instance, "metadata": self.providerInstance.metadata, "correlation_id": correlation_id } return sample_dict def filter_prometheus_sample(sample): """ Filter out samples matching suppressIfZeroRegex with value == 0 """ if (suppressIfZeroRegex is not None and sample.value == 0 and suppressIfZeroRegex.match(sample.name)): return False return True def filter_prometheus_metric(metric): """ Filter out names based on our exclude and include lists """ # Remove everything matching excludeRegex if self.excludeRegex.match(metric.name): return False # If includeRegex is defined, filter out everything NOT matching if (includeRegex is not None and includeRegex.match(metric.name) is None): return False # If none of the above matched, just let the item through return True def nodestatus_from_rhel(samplename): #parse sample name to retrieve status newstatus = samplename[len("ha_cluster_pacemaker_nodes_status_"):] if newstatus == "on_fail": newstatus = "onfail" return newstatus def map_ha_cluster_pacemaker_nodes_status(sample): labels = sample.labels labels["status"] = nodestatus_from_rhel(sample.name) labels["node"] = labels["instname"] labels["type"] = "member" newsample = Sample("ha_cluster_pacemaker_nodes",labels,sample.value,sample.timestamp) return newsample def map_ha_cluster_pacemaker_resources(sample): labels = sample.labels labels["status"] = sample.name[len("ha_cluster_pacemaker_resources_status_"):] # look for node name after colon parts = labels["instname"].split(':', 2) if len(parts) == 2: labels["resource"] = parts[0] labels["node"] = parts[1] else: labels["resource"] = labels["instname"] labels["node"] = labels["hostname"] newsample = Sample("ha_cluster_pacemaker_resources",labels,sample.value,sample.timestamp) return newsample def map_ha_cluster_pacemaker_resources_managed(sample): labels = sample.labels labels["managed"] = "True" parts = labels["instname"].split(':', 2) if len(parts) == 2: labels["resource"] = parts[0] labels["node"] = parts[1] else: labels["resource"] = labels["instname"] labels["node"] = labels["hostname"] newsample = Sample("ha_cluster_pacemaker_resources",labels,sample.value,sample.timestamp) return newsample def map_ha_cluster_pacemaker_fail_migration(sample): labels = sample.labels parts = labels["instname"].split(':', 2) if len(parts) == 2: labels["resource"] = parts[1] labels["node"] = parts[0] else: labels["resource"] = labels["instname"] labels["node"] = labels["hostname"] newsample = Sample(sample.name,labels,sample.value,sample.timestamp) return newsample; test_dict = {"ha_cluster_pacemaker_nodes_status_dc": map_ha_cluster_pacemaker_nodes_status, "ha_cluster_pacemaker_nodes_status_online": map_ha_cluster_pacemaker_nodes_status, "ha_cluster_pacemaker_nodes_status_standby": map_ha_cluster_pacemaker_nodes_status, "ha_cluster_pacemaker_nodes_status_standby_on_fail": map_ha_cluster_pacemaker_nodes_status, "ha_cluster_pacemaker_nodes_status_maintenance": map_ha_cluster_pacemaker_nodes_status, "ha_cluster_pacemaker_nodes_status_pending": map_ha_cluster_pacemaker_nodes_status, "ha_cluster_pacemaker_nodes_status_shutdown": map_ha_cluster_pacemaker_nodes_status, "ha_cluster_pacemaker_nodes_status_expected_up": map_ha_cluster_pacemaker_nodes_status, "ha_cluster_pacemaker_nodes_status_unclean": map_ha_cluster_pacemaker_nodes_status, "ha_cluster_pacemaker_resources_managed": map_ha_cluster_pacemaker_resources_managed, "ha_cluster_pacemaker_resources_status_active": map_ha_cluster_pacemaker_resources, "ha_cluster_pacemaker_resources_status_blocked": map_ha_cluster_pacemaker_resources, "ha_cluster_pacemaker_resources_status_failed": map_ha_cluster_pacemaker_resources, "ha_cluster_pacemaker_resources_status_failure_ignored": map_ha_cluster_pacemaker_resources, "ha_cluster_pacemaker_resources_status_orphaned": map_ha_cluster_pacemaker_resources, "ha_cluster_pacemaker_fail_count": map_ha_cluster_pacemaker_fail_migration, "ha_cluster_pacemaker_migration_threshold": map_ha_cluster_pacemaker_fail_migration } def rhel_to_suse_metric(samples): new_samples = [] for s in samples: mapfunc = test_dict.get(s.name) if mapfunc != None: newsample = mapfunc(s) else: newsample = s new_samples.append(newsample) return new_samples prometheusMetricsText = self.lastResult[0] includeRegex = self.lastResult[1] suppressIfZeroRegex = self.lastResult[2] resultSet = list() def isDCnodedata(filteredsamples): for sample in filteredsamples: if sample.name == "ha_cluster_pacemaker_nodes": if sample.labels["status"] == "dc": if sample.labels["node"] == self.providerInstance.metadata['hostname']: return True return False self.tracer.info("[%s] converting result set into JSON" % self.fullName) try: allfilteredsamples = [] if not prometheusMetricsText: raise ValueError("Empty result from prometheus instance %s", self.providerInstance.instance) for family in filter(filter_prometheus_metric, text_string_to_metric_families(prometheusMetricsText)): allfilteredsamples.extend(filter(filter_prometheus_sample, rhel_to_suse_metric(family.samples))) if isDCnodedata(allfilteredsamples): resultSet.extend(map(prometheusSample2Dict, allfilteredsamples)) else: self.tracer.info("non-dc data from [%s]" % self.providerInstance.instance_name) except ValueError as e: self.tracer.error("[%s] Could not parse prometheus metrics (%s): %s" % (self.fullName, e, prometheusMetricsText)) resultSet.append(prometheusSample2Dict(Sample("up", dict(), 0))) else: # The up-metric is used to determine whatever valid data could be read from # the prometheus endpoint and is used by prometheus in a similar way resultSet.append(prometheusSample2Dict(Sample("up", dict(), 1))) resultSet.append(prometheusSample2Dict( Sample("sapmon", { "SAPMON_VERSION": PAYLOAD_VERSION, "PROVIDER_INSTANCE": self.providerInstance.name }, 1))) # Convert temporary dictionary into JSON string try: # Use a very compact json representation to limit amount of data parsed by LA resultJsonString = json.dumps(resultSet, sort_keys=True, separators=(',',':'), cls=JsonEncoder) self.tracer.debug("[%s] resultJson=%s" % (self.fullName, str(resultJsonString)[:1000])) except Exception as e: self.tracer.error("[%s] could not format logItem=%s into JSON (%s)" % (self.fullName, resultSet[:50], e)) return resultJsonString
def map_ha_cluster_pacemaker_location_constraints(sample): newsample = Sample("ha_cluster_pacemaker_location_constraints",sample.labels,sample.value,sample.timestamp) return newsample;
def decumulate_histogram_buckets(sample_data): """ Decumulate buckets in a given histogram metric and adds the lower_bound label (le being upper_bound) """ # TODO: investigate performance optimizations new_sample_data = [] bucket_values_by_context_upper_bound = {} for sample, tags, hostname in sample_data: if sample.name.endswith('_bucket'): context_key = compute_bucket_hash(sample.labels) if context_key not in bucket_values_by_context_upper_bound: bucket_values_by_context_upper_bound[context_key] = {} bucket_values_by_context_upper_bound[context_key][float( sample.labels['le'])] = sample.value new_sample_data.append([sample, tags, hostname]) sorted_buckets_by_context = {} for context in bucket_values_by_context_upper_bound: sorted_buckets_by_context[context] = sorted( bucket_values_by_context_upper_bound[context]) # Tuples (lower_bound, upper_bound, value) bucket_tuples_by_context_upper_bound = {} for context in sorted_buckets_by_context: for i, upper_b in enumerate(sorted_buckets_by_context[context]): if i == 0: if context not in bucket_tuples_by_context_upper_bound: bucket_tuples_by_context_upper_bound[context] = {} if upper_b > 0: # positive buckets start at zero bucket_tuples_by_context_upper_bound[context][upper_b] = ( 0, upper_b, bucket_values_by_context_upper_bound[context][upper_b], ) else: # negative buckets start at -inf bucket_tuples_by_context_upper_bound[context][upper_b] = ( NEGATIVE_INFINITY, upper_b, bucket_values_by_context_upper_bound[context][upper_b], ) continue tmp = (bucket_values_by_context_upper_bound[context][upper_b] - bucket_values_by_context_upper_bound[context][ sorted_buckets_by_context[context][i - 1]]) bucket_tuples_by_context_upper_bound[context][upper_b] = ( sorted_buckets_by_context[context][i - 1], upper_b, tmp, ) # modify original metric to inject lower_bound & modified value for sample, tags, hostname in new_sample_data: if not sample.name.endswith('_bucket'): yield sample, tags, hostname else: context_key = compute_bucket_hash(sample.labels) matching_bucket_tuple = bucket_tuples_by_context_upper_bound[ context_key][float(sample.labels['le'])] # Prevent 0.0 lower_bound = str(matching_bucket_tuple[0] or 0) sample.labels['lower_bound'] = lower_bound tags.append(f'lower_bound:{lower_bound}') yield Sample(sample.name, sample.labels, matching_bucket_tuple[2]), tags, hostname