def test_largebuffer(self): m = Metrics(threadsafe=True) m2 = Metrics(threadsafe=True, shared=m.share()) # intentionally just over the int overflow for _ in range(32768 + 1): m2.add('key', SumMetric(1)) m2.flush() assert m.report()['key'] == 32768 + 1
def test_largebuffer(self): # legacy test. left as just another test m = Metrics() m2 = Metrics(shared=m.share()) # intentionally just over the int overflow for _ in range(32768 + 1): m2.add('key', SumMetric(1)) assert m.report()['key'] == 32768 + 1
def test_simpleadd(self): m = Metrics() m.add('key', SumMetric(1)) m.add('key', SumMetric(2)) assert m.report()['key'] == 3 m.clear() assert 'key' not in m.report() m.add('key', SumMetric(1.5)) m.add('key', SumMetric(2.5)) assert m.report()['key'] == 4.0
def test_simpleadd(self): m = Metrics(threadsafe=False) m.add('key', SumMetric(1)) m.add('key', SumMetric(2)) assert m.report()['key'] == 3 m.clear() assert 'key' not in m.report() m.add('key', SumMetric(1.5)) m.add('key', SumMetric(2.5)) assert m.report()['key'] == 4.0 # shouldn't throw exception m.flush()
def test_recent(self): m = Metrics() m2 = Metrics(shared=m.share()) m.add('test', SumMetric(1)) assert m.report() == {'test': 1} assert m.report_recent() == {'test': 1} m.clear_recent() m.add('test', SumMetric(2)) assert m.report() == {'test': 3} assert m.report_recent() == {'test': 2} assert m2.report() == {'test': 3} assert m2.report_recent() == {} m2.add('test', SumMetric(3)) assert m2.report() == {'test': 6} assert m.report() == {'test': 6} assert m2.report_recent() == {'test': 3} assert m.report_recent() == {'test': 2} m2.clear_recent() assert m2.report() == {'test': 6} assert m.report() == {'test': 6} assert m2.report_recent() == {} assert m.report_recent() == {'test': 2} m.clear_recent() assert m2.report() == {'test': 6} assert m.report() == {'test': 6} assert m.report_recent() == {}
def test_multithreaded(self): # legacy test, but left because it's just another test m = Metrics() m2 = Metrics(shared=m.share()) m3 = Metrics(shared=m.share()) m2.add('key', SumMetric(1)) m3.add('key', SumMetric(2)) m.add('key', SumMetric(3)) assert m.report()['key'] == 6
def test_shared(self): m = Metrics() m2 = Metrics(shared=m.share()) m3 = Metrics(shared=m.share()) m2.add('key', SumMetric(1)) m3.add('key', SumMetric(2)) m.add('key', SumMetric(3)) assert m.report()['key'] == 6
def test_multithreaded(self): m = Metrics(threadsafe=True) m2 = Metrics(threadsafe=True, shared=m.share()) m3 = Metrics(threadsafe=True, shared=m.share()) m2.add('key', SumMetric(1)) m2.flush() m3.add('key', SumMetric(2)) m3.flush() m.add('key', SumMetric(3)) m.flush() m.report()['key'] == 6
def test_shared(self): m = Metrics(threadsafe=False) m2 = Metrics(threadsafe=False, shared=m.share()) m3 = Metrics(threadsafe=False, shared=m.share()) m2.add('key', SumMetric(1)) m3.add('key', SumMetric(2)) m2.flush() # just make sure this doesn't throw exception, it's a no-op m.add('key', SumMetric(3)) assert m.report()['key'] == 6 # shouldn't throw exception m.flush() m2.flush() m3.flush()
class TodMetrics(Metrics): """ Helper container which encapsulates TOD metrics and does some basic prepocessing to handlers to calculate said metrics. This class should generally not need to be changed; add new metrics handlers to `WORLD_METRIC_HANDLERS` (or otherwise override `self.handlers` of this class) to change metrics actively being used. """ def __init__(self, shared: Dict[str, Any] = None) -> None: super().__init__(shared=shared) self.handlers = [x() for x in WORLD_METRIC_HANDLERS] self.convo_started = False self.last_episode_metrics = Metrics() def handle_message(self, message: Message, agent_type: TodAgentType): if "text" not in message: return if agent_type == TodAgentType.GOAL_GROUNDING_AGENT and len( message["text"]) > len(STANDARD_GOAL): # Only count a conversation as started if there is a goal. self.convo_started = True for handler in self.handlers: metrics = self._handle_message_impl(message, agent_type, handler) if metrics is not None: for name, metric in metrics.items(): if metric is not None: self.add(name, metric) def _handle_message_impl( self, message: Message, agent_type: TodAgentType, handler: world_metrics_handlers.TodMetricsHandler, ): prefix_stripped_text = message["text"].replace( TOD_AGENT_TYPE_TO_PREFIX[agent_type], "") if agent_type is TodAgentType.API_SCHEMA_GROUNDING_AGENT: return handler.handle_api_schemas( message, SerializationHelpers.str_to_api_schemas(prefix_stripped_text)) if agent_type is TodAgentType.GOAL_GROUNDING_AGENT: return handler.handle_goals( message, SerializationHelpers.str_to_goals(prefix_stripped_text)) if agent_type is TodAgentType.USER_UTT_AGENT: return handler.handle_user_utt(message, prefix_stripped_text) if agent_type is TodAgentType.API_CALL_AGENT: return handler.handle_api_call( message, SerializationHelpers.str_to_api_dict(prefix_stripped_text)) if agent_type is TodAgentType.API_RESP_AGENT: return handler.handle_api_resp( message, SerializationHelpers.str_to_api_dict(prefix_stripped_text)) if agent_type is TodAgentType.SYSTEM_UTT_AGENT: return handler.handle_sys_utt(message, prefix_stripped_text) def get_last_episode_metrics(self): """ This is a bit of a hack so that we can report whether or not a convo has successfully hit all goals and associate this with each episode for the purposes of doing filtering. """ return self.last_episode_metrics def episode_reset(self): self.last_episode_metrics = None if self.convo_started: self.last_episode_metrics = Metrics() for handler in self.handlers: metrics = handler.get_episode_metrics() handler.episode_reset() if metrics is not None: for name, metric in metrics.items(): if metric is not None: self.add(name, metric) self.last_episode_metrics.add(name, metric) self.convo_started = False