def test_removing_timer_removes_timer(): name = 'test_timer' m = FluentMetric() m.with_timer(name) time.sleep(1) t = m.get_timer(name) assert t.start < arrow.utcnow() assert t.elapsed_in_ms() > 1000 and t.elapsed_in_ms() < 2000 m.without_timer(name) t = m.get_timer(name) assert not t
def test_can_add_multiple_timers(): name1 = 'test_timer_1' name2 = 'test_timer_2' m = FluentMetric() m.with_timer(name1) time.sleep(1) t = m.get_timer(name1) assert t.start < arrow.utcnow() assert t.elapsed_in_ms() > 1000 and t.elapsed_in_ms() < 2000 m.with_timer(name2) time.sleep(1) u = m.get_timer(name2) assert u.start < arrow.utcnow() assert u.elapsed_in_ms() > 1000 and u.elapsed_in_ms() < 2000 assert t.elapsed_in_ms() > 2000
class APIMonitor: def __init__(self): self.enabled = False if 'api_monitor' in os.environ and int(os.environ['api_monitor']) == 1: self.enabled = True self.use_prometheus = False if 'api_monitor_prometheus' in os.environ and int( os.environ['api_monitor_prometheus']) == 1: self.use_prometheus = True if self.enabled: self.monitor_target = os.environ['db_name'].replace("_", "-") \ if 'db_name' in os.environ else 'trews;' if self.use_prometheus: self.prometheus = PrometheusMonitor() else: self._push_period_secs = int(os.environ['api_monitor_cw_push_period']) \ if 'api_monitor_cw_push_period' in os.environ else 60 # k8s pods have their pod name set as the hostname. stream_id = os.environ[ 'HOSTNAME'] if 'HOSTNAME' in os.environ else 'api-testing' self.cw_metrics = FluentMetric().with_namespace( 'OpsDX').with_stream_id(stream_id) # Latencies and request counters. self._counters = {} self._latencies = {} # General-purpose metrics. self._metrics = {} self._metric_specs = {} # Asynchronous stats uploads. async def start_monitor(self, app): if self.enabled and not self.use_prometheus: loop = asyncio.get_event_loop() self._push_handle = loop.call_later(self._push_period_secs, self._cw_flush, loop) return True async def stop_monitor(self, app): if self._push_handle: self._push_handle.cancel() return True # Returns a context manager for timing in a 'with' block. def time(self, name): if self.enabled: if self.use_prometheus: return self.prometheus.trews_api_request_latency.labels( self.monitor_target, name).time() else: return _CloudwatchTimer(self, name) else: return _NullContextManager() # Track a request served/processed. Internally increments a counter. def request(self, name, value=1): if self.enabled: if self.use_prometheus: self.prometheus.trews_api_request_counts.labels( self.monitor_target, name).inc() else: self._request(name, value) # Helpers. def _latency(self, name): if self.enabled: timer = self.cw_metrics.get_timer(name) duration_ms = timer.elapsed_in_ms() if name in self._latencies: self._latencies[name].append(duration_ms) else: self._latencies[name] = [duration_ms] def _request(self, name, value): if self.enabled: if name in self._counters: self._counters[name] += value else: self._counters[name] = value # Metrics. # TODO: Prometheus implementation def register_metric(self, metric_name, unit, dimensions): if self.enabled and not self.use_prometheus: self._metric_specs[metric_name] = { 'unit': unit, 'dimensions': dimensions } def add_metric(self, name, value=1): if self.enabled and not self.use_prometheus: if name in self._metrics: self._metrics[name] += value else: self._metrics[name] = value def append_metric(self, name, value=1): if self.enabled and not self.use_prometheus: if name in self._metrics: self._metrics[name].append(value) else: self._metrics[name] = [value] # Metrics upload. def _cw_flush(self, loop): if self.enabled: try: logging.info('Flushing CW metrics... %s %s' % (len(self._latencies), len(self._counters))) self.cw_metrics.with_dimension('API', self.monitor_target) for k, v in self._counters.items(): logging.info('Requests %s %s' % (k, str(v))) self.cw_metrics.with_dimension('Route', k) self.cw_metrics.count(MetricName='Requests', Count=v) for k, v in self._latencies.items(): self.cw_metrics.with_dimension('Route', k) l_cnt = float(len(v)) l_sum = float(functools.reduce(lambda acc, x: acc + x, v)) l_avg = l_sum / l_cnt if l_cnt > 0 else 0.0 logging.info('Latency %s %s %s %s' % (k, l_cnt, l_sum, l_avg)) self.cw_metrics.count(MetricName='LatencyCount', Count=l_cnt) \ .log(MetricName='LatencySum', Value=l_sum, Unit='Milliseconds') \ .log(MetricName='LatencyAvg', Value=l_avg, Unit='Milliseconds') self.cw_metrics.without_dimension('Route') self.cw_metrics.without_dimension('API') for k, v in self._metrics.items(): unit = self._metric_specs.get(k, {}).get('unit', 'None') dimensions = self._metric_specs.get(k, {}).get( 'dimensions', []) self.cw_metrics.push_dimensions() for dn, dv in dimensions: self.cw_metrics.with_dimension(dn, dv) if isinstance(v, Number): logging.info('NMetric %s %s' % (k, v)) self.cw_metrics.log(MetricName=k, Value=v, Unit=unit) elif isinstance(v, list): v_cnt = float(len(v)) v_sum = float( functools.reduce(lambda acc, x: acc + x, v)) v_avg = v_sum / v_cnt if v_cnt > 0 else 0.0 logging.info('LMetric %s %s %s %s' % (k, v_cnt, v_sum, v_avg)) self.cw_metrics.count(MetricName='%sCount' % k, Count=v_cnt) \ .log(MetricName='%sSum' % k, Value=v_sum, Unit=unit) \ .log(MetricName='%sAvg' % k, Value=v_avg, Unit=unit) self.cw_metrics.pop_dimensions() self._metrics = {} self._counters = {} self._latencies = {} # Schedule the next flush. self._push_handle = loop.call_later(self._push_period_secs, self._cw_flush, loop) except Exception as e: logging.error(str(e)) traceback.print_exc()