def test_metric_gc(ray_instance, start_target_actor): target_actor = start_target_actor # this means when new scrapes are invoked, the metric_monitor = MetricMonitor.remote(gc_window_seconds=0) ray.get(metric_monitor.add_target.remote(target_actor)) ray.get(metric_monitor.scrape.remote()) df = ray.get(metric_monitor._get_dataframe.remote()) assert len(df) == 102 # Old metric sould be cleared. So only 1 counter + 101 list values left. ray.get(metric_monitor.scrape.remote()) df = ray.get(metric_monitor._get_dataframe.remote()) assert len(df) == 102
def _get_or_start_metric_monitor(self, gc_window_s): """Get the metric monitor belonging to this serve cluster. If the metric monitor does not already exist, it will be started. """ try: self.metric_monitor = ray.util.get_actor(SERVE_METRIC_MONITOR_NAME) except ValueError: logger.info("Starting metric monitor with name '{}'".format( SERVE_METRIC_MONITOR_NAME)) self.metric_monitor = MetricMonitor.options( detached=True, name=SERVE_METRIC_MONITOR_NAME).remote(gc_window_s) # TODO(edoakes): move these into the constructor. start_metric_monitor_loop.remote(self.metric_monitor) self.metric_monitor.add_target.remote(self.router)
def test_metric_system(ray_instance, start_target_actor): target_actor = start_target_actor metric_monitor = MetricMonitor.remote() ray.get(metric_monitor.add_target.remote(target_actor)) # Scrape once ray.get(metric_monitor.scrape.remote()) percentiles = [50, 90, 95] agg_windows_seconds = [60] result = ray.get( metric_monitor.collect.remote(percentiles, agg_windows_seconds)) real_counter_value = ray.get(target_actor.get_counter_value.remote()) expected_result = { "counter": real_counter_value, "latency_list_50th_perc_60_window": 50.0, "latency_list_90th_perc_60_window": 90.0, "latency_list_95th_perc_60_window": 95.0, } assert result == expected_result
def start_metric_monitor(self, gc_window_seconds): assert self.metric_monitor is None, "Metric monitor already started." self.metric_monitor = MetricMonitor.remote(gc_window_seconds) # TODO(edoakes): this should be an actor method, not a separate task. start_metric_monitor_loop.remote(self.metric_monitor) self.metric_monitor.add_target.remote(self.router)