def __init__(self, metrics, prefix): self.metrics = metrics self.group_name = '%s-fetch-manager-metrics' % prefix self.bytes_fetched = metrics.sensor('bytes-fetched') self.bytes_fetched.add(metrics.metric_name('fetch-size-avg', self.group_name, 'The average number of bytes fetched per request'), Avg()) self.bytes_fetched.add(metrics.metric_name('fetch-size-max', self.group_name, 'The maximum number of bytes fetched per request'), Max()) self.bytes_fetched.add(metrics.metric_name('bytes-consumed-rate', self.group_name, 'The average number of bytes consumed per second'), Rate()) self.records_fetched = self.metrics.sensor('records-fetched') self.records_fetched.add(metrics.metric_name('records-per-request-avg', self.group_name, 'The average number of records in each request'), Avg()) self.records_fetched.add(metrics.metric_name('records-consumed-rate', self.group_name, 'The average number of records consumed per second'), Rate()) self.fetch_latency = metrics.sensor('fetch-latency') self.fetch_latency.add(metrics.metric_name('fetch-latency-avg', self.group_name, 'The average time taken for a fetch request.'), Avg()) self.fetch_latency.add(metrics.metric_name('fetch-latency-max', self.group_name, 'The max time taken for any fetch request.'), Max()) self.fetch_latency.add(metrics.metric_name('fetch-rate', self.group_name, 'The number of fetch requests per second.'), Rate(sampled_stat=Count())) self.records_fetch_lag = metrics.sensor('records-lag') self.records_fetch_lag.add(metrics.metric_name('records-lag-max', self.group_name, 'The maximum lag in terms of number of records for any partition in self window'), Max()) self.fetch_throttle_time_sensor = metrics.sensor('fetch-throttle-time') self.fetch_throttle_time_sensor.add(metrics.metric_name('fetch-throttle-time-avg', self.group_name, 'The average throttle time in ms'), Avg()) self.fetch_throttle_time_sensor.add(metrics.metric_name('fetch-throttle-time-max', self.group_name, 'The maximum throttle time in ms'), Max())
def __init__(self, heartbeat, metrics, prefix, tags=None): self.heartbeat = heartbeat self.metrics = metrics self.metric_group_name = prefix + "-coordinator-metrics" self.heartbeat_latency = metrics.sensor('heartbeat-latency') self.heartbeat_latency.add( metrics.metric_name( 'heartbeat-response-time-max', self.metric_group_name, 'The max time taken to receive a response to a heartbeat request', tags), Max()) self.heartbeat_latency.add( metrics.metric_name('heartbeat-rate', self.metric_group_name, 'The average number of heartbeats per second', tags), Rate(sampled_stat=Count())) self.join_latency = metrics.sensor('join-latency') self.join_latency.add( metrics.metric_name('join-time-avg', self.metric_group_name, 'The average time taken for a group rejoin', tags), Avg()) self.join_latency.add( metrics.metric_name('join-time-max', self.metric_group_name, 'The max time taken for a group rejoin', tags), Max()) self.join_latency.add( metrics.metric_name('join-rate', self.metric_group_name, 'The number of group joins per second', tags), Rate(sampled_stat=Count())) self.sync_latency = metrics.sensor('sync-latency') self.sync_latency.add( metrics.metric_name('sync-time-avg', self.metric_group_name, 'The average time taken for a group sync', tags), Avg()) self.sync_latency.add( metrics.metric_name('sync-time-max', self.metric_group_name, 'The max time taken for a group sync', tags), Max()) self.sync_latency.add( metrics.metric_name('sync-rate', self.metric_group_name, 'The number of group syncs per second', tags), Rate(sampled_stat=Count())) metrics.add_metric( metrics.metric_name( 'last-heartbeat-seconds-ago', self.metric_group_name, 'The number of seconds since the last controller heartbeat was sent', tags), AnonMeasurable(lambda _, now: (now / 1000) - self.heartbeat.last_send))
def __init__(self, metrics, metric_group_prefix, subscription): self.metrics = metrics self.metric_group_name = '%s-coordinator-metrics' % ( metric_group_prefix, ) self.commit_latency = metrics.sensor('commit-latency') self.commit_latency.add( metrics.metric_name('commit-latency-avg', self.metric_group_name, 'The average time taken for a commit request'), Avg()) self.commit_latency.add( metrics.metric_name('commit-latency-max', self.metric_group_name, 'The max time taken for a commit request'), Max()) self.commit_latency.add( metrics.metric_name('commit-rate', self.metric_group_name, 'The number of commit calls per second'), Rate(sampled_stat=Count())) num_parts = AnonMeasurable( lambda config, now: len(subscription.assigned_partitions())) metrics.add_metric( metrics.metric_name( 'assigned-partitions', self.metric_group_name, 'The number of partitions currently assigned to this consumer' ), num_parts)
def test_simple_stats(mocker, time_keeper, config, metrics): mocker.patch('time.time', side_effect=time_keeper.time) measurable = ConstantMeasurable() metrics.add_metric(metrics.metric_name('direct.measurable', 'grp1', 'The fraction of time an appender waits for space allocation.'), measurable) sensor = metrics.sensor('test.sensor') sensor.add(metrics.metric_name('test.avg', 'grp1'), Avg()) sensor.add(metrics.metric_name('test.max', 'grp1'), Max()) sensor.add(metrics.metric_name('test.min', 'grp1'), Min()) sensor.add(metrics.metric_name('test.rate', 'grp1'), Rate(TimeUnit.SECONDS)) sensor.add(metrics.metric_name('test.occurences', 'grp1'),Rate(TimeUnit.SECONDS, Count())) sensor.add(metrics.metric_name('test.count', 'grp1'), Count()) percentiles = [Percentile(metrics.metric_name('test.median', 'grp1'), 50.0), Percentile(metrics.metric_name('test.perc99_9', 'grp1'), 99.9)] sensor.add_compound(Percentiles(100, BucketSizing.CONSTANT, 100, -100, percentiles=percentiles)) sensor2 = metrics.sensor('test.sensor2') sensor2.add(metrics.metric_name('s2.total', 'grp1'), Total()) sensor2.record(5.0) sum_val = 0 count = 10 for i in range(count): sensor.record(i) sum_val += i # prior to any time passing elapsed_secs = (config.time_window_ms * (config.samples - 1)) / 1000.0 assert abs(count / elapsed_secs - metrics.metrics.get(metrics.metric_name('test.occurences', 'grp1')).value()) \ < EPS, 'Occurrences(0...%d) = %f' % (count, count / elapsed_secs) # pretend 2 seconds passed... sleep_time_seconds = 2.0 time_keeper.sleep(sleep_time_seconds) elapsed_secs += sleep_time_seconds assert abs(5.0 - metrics.metrics.get(metrics.metric_name('s2.total', 'grp1')).value()) \ < EPS, 's2 reflects the constant value' assert abs(4.5 - metrics.metrics.get(metrics.metric_name('test.avg', 'grp1')).value()) \ < EPS, 'Avg(0...9) = 4.5' assert abs((count - 1) - metrics.metrics.get(metrics.metric_name('test.max', 'grp1')).value()) \ < EPS, 'Max(0...9) = 9' assert abs(0.0 - metrics.metrics.get(metrics.metric_name('test.min', 'grp1')).value()) \ < EPS, 'Min(0...9) = 0' assert abs((sum_val / elapsed_secs) - metrics.metrics.get(metrics.metric_name('test.rate', 'grp1')).value()) \ < EPS, 'Rate(0...9) = 1.40625' assert abs((count / elapsed_secs) - metrics.metrics.get(metrics.metric_name('test.occurences', 'grp1')).value()) \ < EPS, 'Occurrences(0...%d) = %f' % (count, count / elapsed_secs) assert abs(count - metrics.metrics.get(metrics.metric_name('test.count', 'grp1')).value()) \ < EPS, 'Count(0...9) = 10'
def test_old_data_has_no_effect(mocker, time_keeper): mocker.patch('time.time', side_effect=time_keeper.time) max_stat = Max() min_stat = Min() avg_stat = Avg() count_stat = Count() window_ms = 100 samples = 2 config = MetricConfig(time_window_ms=window_ms, samples=samples) max_stat.record(config, 50, time_keeper.ms()) min_stat.record(config, 50, time_keeper.ms()) avg_stat.record(config, 50, time_keeper.ms()) count_stat.record(config, 50, time_keeper.ms()) time_keeper.sleep(samples * window_ms / 1000.0) assert float('-inf') == max_stat.measure(config, time_keeper.ms()) assert float(sys.maxsize) == min_stat.measure(config, time_keeper.ms()) assert 0.0 == avg_stat.measure(config, time_keeper.ms()) assert 0 == count_stat.measure(config, time_keeper.ms())
def record_topic_fetch_metrics(self, topic, num_bytes, num_records): # record bytes fetched name = '.'.join(['topic', topic, 'bytes-fetched']) bytes_fetched = self.metrics.get_sensor(name) if not bytes_fetched: metric_tags = {'topic': topic.replace('.', '_')} bytes_fetched = self.metrics.sensor(name) bytes_fetched.add( self.metrics.metric_name( 'fetch-size-avg', self.group_name, 'The average number of bytes fetched per request for topic %s' % (topic, ), metric_tags), Avg()) bytes_fetched.add( self.metrics.metric_name( 'fetch-size-max', self.group_name, 'The maximum number of bytes fetched per request for topic %s' % (topic, ), metric_tags), Max()) bytes_fetched.add( self.metrics.metric_name( 'bytes-consumed-rate', self.group_name, 'The average number of bytes consumed per second for topic %s' % (topic, ), metric_tags), Rate()) bytes_fetched.record(num_bytes) # record records fetched name = '.'.join(['topic', topic, 'records-fetched']) records_fetched = self.metrics.get_sensor(name) if not records_fetched: metric_tags = {'topic': topic.replace('.', '_')} records_fetched = self.metrics.sensor(name) records_fetched.add( self.metrics.metric_name( 'records-per-request-avg', self.group_name, 'The average number of records in each request for topic %s' % (topic, ), metric_tags), Avg()) records_fetched.add( self.metrics.metric_name( 'records-consumed-rate', self.group_name, 'The average number of records consumed per second for topic %s' % (topic, ), metric_tags), Rate()) records_fetched.record(num_records)
def __init__(self, metrics, metric_group_prefix, node_id): self.metrics = metrics # Any broker may have registered summary metrics already # but if not, we need to create them so we can set as parents below all_conns_transferred = metrics.get_sensor('bytes-sent-received') if not all_conns_transferred: metric_group_name = metric_group_prefix + '-metrics' bytes_transferred = metrics.sensor('bytes-sent-received') bytes_transferred.add(metrics.metric_name( 'network-io-rate', metric_group_name, 'The average number of network operations (reads or writes) on all' ' connections per second.'), Rate(sampled_stat=Count())) bytes_sent = metrics.sensor('bytes-sent', parents=[bytes_transferred]) bytes_sent.add(metrics.metric_name( 'outgoing-byte-rate', metric_group_name, 'The average number of outgoing bytes sent per second to all' ' servers.'), Rate()) bytes_sent.add(metrics.metric_name( 'request-rate', metric_group_name, 'The average number of requests sent per second.'), Rate(sampled_stat=Count())) bytes_sent.add(metrics.metric_name( 'request-size-avg', metric_group_name, 'The average size of all requests in the window.'), Avg()) bytes_sent.add(metrics.metric_name( 'request-size-max', metric_group_name, 'The maximum size of any request sent in the window.'), Max()) bytes_received = metrics.sensor('bytes-received', parents=[bytes_transferred]) bytes_received.add(metrics.metric_name( 'incoming-byte-rate', metric_group_name, 'Bytes/second read off all sockets'), Rate()) bytes_received.add(metrics.metric_name( 'response-rate', metric_group_name, 'Responses received sent per second.'), Rate(sampled_stat=Count())) request_latency = metrics.sensor('request-latency') request_latency.add(metrics.metric_name( 'request-latency-avg', metric_group_name, 'The average request latency in ms.'), Avg()) request_latency.add(metrics.metric_name( 'request-latency-max', metric_group_name, 'The maximum request latency in ms.'), Max()) # if one sensor of the metrics has been registered for the connection, # then all other sensors should have been registered; and vice versa node_str = 'node-{0}'.format(node_id) node_sensor = metrics.get_sensor(node_str + '.bytes-sent') if not node_sensor: metric_group_name = metric_group_prefix + '-node-metrics.' + node_str bytes_sent = metrics.sensor( node_str + '.bytes-sent', parents=[metrics.get_sensor('bytes-sent')]) bytes_sent.add(metrics.metric_name( 'outgoing-byte-rate', metric_group_name, 'The average number of outgoing bytes sent per second.'), Rate()) bytes_sent.add(metrics.metric_name( 'request-rate', metric_group_name, 'The average number of requests sent per second.'), Rate(sampled_stat=Count())) bytes_sent.add(metrics.metric_name( 'request-size-avg', metric_group_name, 'The average size of all requests in the window.'), Avg()) bytes_sent.add(metrics.metric_name( 'request-size-max', metric_group_name, 'The maximum size of any request sent in the window.'), Max()) bytes_received = metrics.sensor( node_str + '.bytes-received', parents=[metrics.get_sensor('bytes-received')]) bytes_received.add(metrics.metric_name( 'incoming-byte-rate', metric_group_name, 'Bytes/second read off node-connection socket'), Rate()) bytes_received.add(metrics.metric_name( 'response-rate', metric_group_name, 'The average number of responses received per second.'), Rate(sampled_stat=Count())) request_time = metrics.sensor( node_str + '.latency', parents=[metrics.get_sensor('request-latency')]) request_time.add(metrics.metric_name( 'request-latency-avg', metric_group_name, 'The average request latency in ms.'), Avg()) request_time.add(metrics.metric_name( 'request-latency-max', metric_group_name, 'The maximum request latency in ms.'), Max()) self.bytes_sent = metrics.sensor(node_str + '.bytes-sent') self.bytes_received = metrics.sensor(node_str + '.bytes-received') self.request_time = metrics.sensor(node_str + '.latency')
def __init__(self, metrics, client, metadata): self.metrics = metrics self._client = client self._metadata = metadata sensor_name = 'batch-size' self.batch_size_sensor = self.metrics.sensor(sensor_name) self.add_metric( 'batch-size-avg', Avg(), sensor_name=sensor_name, description= 'The average number of bytes sent per partition per-request.') self.add_metric( 'batch-size-max', Max(), sensor_name=sensor_name, description= 'The max number of bytes sent per partition per-request.') sensor_name = 'compression-rate' self.compression_rate_sensor = self.metrics.sensor(sensor_name) self.add_metric( 'compression-rate-avg', Avg(), sensor_name=sensor_name, description='The average compression rate of record batches.') sensor_name = 'queue-time' self.queue_time_sensor = self.metrics.sensor(sensor_name) self.add_metric( 'record-queue-time-avg', Avg(), sensor_name=sensor_name, description= 'The average time in ms record batches spent in the record accumulator.' ) self.add_metric( 'record-queue-time-max', Max(), sensor_name=sensor_name, description= 'The maximum time in ms record batches spent in the record accumulator.' ) sensor_name = 'produce-throttle-time' self.produce_throttle_time_sensor = self.metrics.sensor(sensor_name) self.add_metric('produce-throttle-time-avg', Avg(), sensor_name=sensor_name, description='The average throttle time in ms') self.add_metric('produce-throttle-time-max', Max(), sensor_name=sensor_name, description='The maximum throttle time in ms') sensor_name = 'records-per-request' self.records_per_request_sensor = self.metrics.sensor(sensor_name) self.add_metric( 'record-send-rate', Rate(), sensor_name=sensor_name, description='The average number of records sent per second.') self.add_metric( 'records-per-request-avg', Avg(), sensor_name=sensor_name, description='The average number of records per request.') sensor_name = 'bytes' self.byte_rate_sensor = self.metrics.sensor(sensor_name) self.add_metric( 'byte-rate', Rate(), sensor_name=sensor_name, description='The average number of bytes sent per second.') sensor_name = 'record-retries' self.retry_sensor = self.metrics.sensor(sensor_name) self.add_metric( 'record-retry-rate', Rate(), sensor_name=sensor_name, description='The average per-second number of retried record sends' ) sensor_name = 'errors' self.error_sensor = self.metrics.sensor(sensor_name) self.add_metric( 'record-error-rate', Rate(), sensor_name=sensor_name, description= 'The average per-second number of record sends that resulted in errors' ) sensor_name = 'record-size-max' self.max_record_size_sensor = self.metrics.sensor(sensor_name) self.add_metric( 'record-size-max', Max(), sensor_name=sensor_name, description='The maximum record size across all batches') self.add_metric( 'record-size-avg', Avg(), sensor_name=sensor_name, description='The average maximum record size per batch') self.add_metric( 'requests-in-flight', AnonMeasurable(lambda *_: self._client.in_flight_request_count()), description= 'The current number of in-flight requests awaiting a response.') self.add_metric( 'metadata-age', AnonMeasurable(lambda _, now: ( now - self._metadata._last_successful_refresh_ms) / 1000), description= 'The age in seconds of the current producer metadata being used.')