def evaluate_trackers( self, trackers: Iterator[Optional[DialogueStateTracker]], output_file: Path, session_stats_file: Optional[Path] = None, overall_stats_file: Optional[Path] = None, ) -> None: """Collect markers for each dialogue in each tracker loaded. Args: trackers: An iterator over the trackers from which we want to extract markers. output_file: Path to write out the extracted markers. session_stats_file: (Optional) Path to write out statistics about the extracted markers for each session separately. overall_stats_file: (Optional) Path to write out statistics about the markers extracted from all session data. Raises: `FileExistsError` if any of the specified files already exists `NotADirectoryError` if any of the specified files is supposed to be contained in a directory that does not exist """ # Check files and folders before doing the costly swipe over the trackers: for path in [session_stats_file, overall_stats_file, output_file]: if path is not None and path.is_file(): raise FileExistsError( f"Expected that no file {path} already exists.") if path is not None and not path.parent.is_dir(): raise NotADirectoryError( f"Expected directory {path.parent} to exist.") # Apply marker to each session stored in each tracker and save the results. processed_trackers: Dict[Text, List[SessionEvaluation]] = {} for tracker in trackers: if tracker: tracker_result = self.evaluate_events(tracker.events) processed_trackers[tracker.sender_id] = tracker_result processed_trackers_count = len(processed_trackers) telemetry.track_markers_extracted(processed_trackers_count) Marker._save_results(output_file, processed_trackers) # Compute and write statistics if requested. if session_stats_file or overall_stats_file: from rasa.core.evaluation.marker_stats import MarkerStatistics stats = MarkerStatistics() for sender_id, tracker_result in processed_trackers.items(): for session_idx, session_result in enumerate(tracker_result): stats.process( sender_id=sender_id, session_idx=session_idx, meta_data_on_relevant_events_per_marker=session_result, ) telemetry.track_markers_stats_computed(processed_trackers_count) if overall_stats_file: stats.overall_statistic_to_csv(path=overall_stats_file) if session_stats_file: stats.per_session_statistics_to_csv(path=session_stats_file)
async def test_events_schema(monkeypatch: MonkeyPatch, default_agent: Agent, config_path: Text): # this allows us to patch the printing part used in debug mode to collect the # reported events monkeypatch.setenv("RASA_TELEMETRY_DEBUG", "true") monkeypatch.setenv("RASA_TELEMETRY_ENABLED", "true") mock = Mock() monkeypatch.setattr(telemetry, "print_telemetry_event", mock) with open(TELEMETRY_EVENTS_JSON) as f: schemas = json.load(f)["events"] initial = asyncio.all_tasks() # Generate all known backend telemetry events, and then use events.json to # validate their schema. training_data = TrainingDataImporter.load_from_config(config_path) with telemetry.track_model_training(training_data, "rasa"): await asyncio.sleep(1) telemetry.track_telemetry_disabled() telemetry.track_data_split(0.5, "nlu") telemetry.track_validate_files(True) telemetry.track_data_convert("yaml", "nlu") telemetry.track_tracker_export(5, TrackerStore(domain=None), EventBroker()) telemetry.track_interactive_learning_start(True, False) telemetry.track_server_start([CmdlineInput()], None, None, 42, True) telemetry.track_project_init("tests/") telemetry.track_shell_started("nlu") telemetry.track_rasa_x_local() telemetry.track_visualization() telemetry.track_core_model_test(5, True, default_agent) telemetry.track_nlu_model_test(TrainingData()) telemetry.track_markers_extraction_initiated("all", False, False, None) telemetry.track_markers_extracted(1) telemetry.track_markers_stats_computed(1) telemetry.track_markers_parsed_count(1, 1, 1) pending = asyncio.all_tasks() - initial await asyncio.gather(*pending) assert mock.call_count == 19 for args, _ in mock.call_args_list: event = args[0] # `metrics_id` automatically gets added to all event but is # not part of the schema so we need to remove it before validation del event["properties"]["metrics_id"] jsonschema.validate(instance=event["properties"], schema=schemas[event["event"]])