def report_cluster(self, config, cluster): uptime = cluster.get('nimbusUptime', None) if uptime is not None: self.gauge(self.metric(config, 'cluster.nimbus_uptime_seconds'), storm_utils.translate_timespec(uptime), tags=config.tags) self.gauge(self.metric(config, 'cluster.slots_used_count'), cluster['slotsUsed'], tags=config.tags) self.gauge(self.metric(config, 'cluster.supervisor_count'), cluster['supervisors'], tags=config.tags) self.gauge(self.metric(config, 'cluster.slots_total_count'), cluster['slotsTotal'], tags=config.tags) self.gauge(self.metric(config, 'cluster.slots_free_count'), cluster['slotsFree'], tags=config.tags) self.gauge(self.metric(config, 'cluster.executors_total_count'), cluster['executorsTotal'], tags=config.tags) self.gauge(self.metric(config, 'cluster.tasks_total_count'), cluster['tasksTotal'], tags=config.tags)
def test_timespec(self): self.assertEqual(330, storm_utils.translate_timespec('5m 30s')) self.assertEqual(1728331, storm_utils.translate_timespec('20d 5m 31s')) self.assertEqual(12114041, storm_utils.translate_timespec('20w 5h 41s')) self.assertEqual(0, storm_utils.translate_timespec('')) with self.assertRaises(ValueError) as context: storm_utils.translate_timespec('20--') with self.assertRaises(ValueError) as context: storm_utils.translate_timespec('20Y 5m 3s')
def report_supervisors(self, config, resp): self.gauge(self.metric(config, 'supervisors_total'), len(resp.get('supervisors', [])), tags=config.tags) for supe in resp.get('supervisors', []): supe_tags = [ 'storm_host:' + supe.get('host'), ] + config.tags self.gauge(self.metric(config, 'supervisor.slots_total'), supe.get('slotsTotal'), tags=supe_tags) self.gauge(self.metric(config, 'supervisor.slots_used_total'), supe.get('slotsUsed'), tags=supe_tags) self.gauge(self.metric(config, 'supervisor.uptime_seconds'), storm_utils.translate_timespec(supe.get('uptime')), tags=supe_tags)
def report_topologies(self, config, topologies): """Report metadata about topologies that match the topologies regex. """ for pretty_name, topo in topologies.iteritems(): uptime = topo.get('uptime', '0s') tags = config.tags + [ 'storm_topology:' + pretty_name, ] self.gauge(self.metric(config, 'topologies.uptime_seconds'), storm_utils.translate_timespec(uptime), tags=tags) self.gauge(self.metric(config, 'topologies.tasks_total'), topo['tasksTotal'], tags=tags) self.gauge(self.metric(config, 'topologies.workers_total'), topo['workersTotal'], tags=tags) self.gauge(self.metric(config, 'topologies.executors_total'), topo['executorsTotal'], tags=tags)
def report_executor_details(self, config, details): """ Report statistics for a single topology's task ID's executors. """ topology_name = self._topology_name(config, details) name = details['id'] task_type = details['componentType'] tags = config.tags + self.task_id_tags(config, task_type, name) + [ 'storm_topology:' + topology_name, 'storm_component_type:' + task_type, 'storm_task_id:' + details['id'], ] self.gauge(self.metric(config, 'executor.executors_total'), details['executors'], tags=tags) self.gauge(self.metric(config, 'executor.tasks_total'), details['executors'], tags=tags) # Embarrassingly, executorStats are undocumented in the REST # API docs (so we might not be allowed to rely on them). But # they're the only way to get some SERIOUSLY useful metrics - # per-host metrics, in particular. for executor in details['executorStats']: executor_tags = tags + [ 'executor_id:' + executor['id'], 'storm_host:' + executor['host'], 'storm_port:' + str(executor['port']), ] self.monotonic_count(self.metric(config, 'executor.emitted_total'), executor.get('emitted', 0), tags=executor_tags) self.monotonic_count(self.metric(config, 'executor.transferred_total'), executor.get('transferred', 0), tags=executor_tags) self.monotonic_count(self.metric(config, 'executor.acked_total'), executor.get('acked', 0), tags=executor_tags) self.monotonic_count(self.metric(config, 'executor.executed_total'), executor.get('executed', 0), tags=executor_tags) self.monotonic_count(self.metric(config, 'executor.failed_total'), executor.get('failed', 0), tags=executor_tags) self.gauge(self.metric(config, 'executor.execute_latency_us'), float(executor.get('executeLatency', 0)), tags=executor_tags) self.gauge(self.metric(config, 'executor.process_latency_us'), float(executor.get('processLatency', 0)), tags=executor_tags) self.gauge(self.metric(config, 'executor.capacity_percent'), float(executor.get('capacity', 0)) * 100, tags=executor_tags) self.gauge(self.metric(config, 'executor.uptime_seconds'), storm_utils.translate_timespec(executor.get('uptime', '0s')), tags=executor_tags)
def report_cluster(self, config, cluster): self.gauge(self.metric(config, 'cluster.nimbus_uptime_seconds'), storm_utils.translate_timespec(cluster['nimbusUptime']), tags=config.tags) self.gauge(self.metric(config, 'cluster.slots_used_count'), cluster['slotsUsed'], tags=config.tags) self.gauge(self.metric(config, 'cluster.supervisor_count'), cluster['supervisors'], tags=config.tags) self.gauge(self.metric(config, 'cluster.slots_total_count'), cluster['slotsTotal'], tags=config.tags) self.gauge(self.metric(config, 'cluster.slots_free_count'), cluster['slotsFree'], tags=config.tags) self.gauge(self.metric(config, 'cluster.executors_total_count'), cluster['executorsTotal'], tags=config.tags) self.gauge(self.metric(config, 'cluster.tasks_total_count'), cluster['tasksTotal'], tags=config.tags)