def _run_markers( seed: Optional[int], count: Optional[int], endpoint_config: Path, domain_path: Optional[Text], strategy: Text, config: Path, output_filename: Path, stats_file_prefix: Optional[Path] = None, ) -> None: """Run markers algorithm over specified config and tracker store. Args: seed: (Optional) The seed to initialise the random number generator for use with the 'sample' strategy. count: (Optional) Number of trackers to extract from (for any strategy except 'all'). endpoint_config: Path to the endpoint configuration defining the tracker store to use. domain_path: Path to the domain specification to use when validating the marker definitions. strategy: Strategy to use when selecting trackers to extract from. config: Path to the markers definition file to use. output_filename: Path to write out the extracted markers. stats_file_prefix: (Optional) A prefix used to create paths where files with statistics on the marker extraction results will be written. It must consists of the path to the where those files should be stored and the common file prefix, e.g. '<path-to-stats-folder>/statistics'. Statistics derived from all marker extractions will be stored in '<path-to-stats-folder>/statistics-overall.csv', while the statistics computed per session will be stored in '<path-to-stats-folder>/statistics-per-session.csv'. """ telemetry.track_markers_extraction_initiated( strategy=strategy, only_extract=stats_file_prefix is not None, seed=seed is not None, count=count, ) domain = Domain.load(domain_path) if domain_path else None markers = Marker.from_path(config) if domain and not markers.validate_against_domain(domain): rasa.shared.utils.cli.print_error_and_exit( "Validation errors were found in the markers definition. " "Please see errors listed above and fix before running again." ) # Calculate telemetry # All loaded markers are combined with one virtual OR over all markers num_markers = len(markers.sub_markers) max_depth = markers.max_depth() - 1 # Find maximum branching of marker branching_factor = max( ( len(sub_marker.sub_markers) for marker in markers.sub_markers for sub_marker in marker.flatten() if isinstance(sub_marker, OperatorMarker) ), default=0, ) telemetry.track_markers_parsed_count(num_markers, max_depth, branching_factor) tracker_loader = _create_tracker_loader( endpoint_config, strategy, domain, count, seed ) def _append_suffix(path: Optional[Path], suffix: Text) -> Optional[Path]: return path.parent / (path.name + suffix) if path else None try: markers.evaluate_trackers( trackers=tracker_loader.load(), output_file=output_filename, session_stats_file=_append_suffix(stats_file_prefix, STATS_SESSION_SUFFIX), overall_stats_file=_append_suffix(stats_file_prefix, STATS_OVERALL_SUFFIX), ) except (FileExistsError, NotADirectoryError) as e: rasa.shared.utils.cli.print_error_and_exit(message=str(e))
async def test_events_schema(monkeypatch: MonkeyPatch, default_agent: Agent, config_path: Text): # this allows us to patch the printing part used in debug mode to collect the # reported events monkeypatch.setenv("RASA_TELEMETRY_DEBUG", "true") monkeypatch.setenv("RASA_TELEMETRY_ENABLED", "true") mock = Mock() monkeypatch.setattr(telemetry, "print_telemetry_event", mock) with open(TELEMETRY_EVENTS_JSON) as f: schemas = json.load(f)["events"] initial = asyncio.all_tasks() # Generate all known backend telemetry events, and then use events.json to # validate their schema. training_data = TrainingDataImporter.load_from_config(config_path) with telemetry.track_model_training(training_data, "rasa"): await asyncio.sleep(1) telemetry.track_telemetry_disabled() telemetry.track_data_split(0.5, "nlu") telemetry.track_validate_files(True) telemetry.track_data_convert("yaml", "nlu") telemetry.track_tracker_export(5, TrackerStore(domain=None), EventBroker()) telemetry.track_interactive_learning_start(True, False) telemetry.track_server_start([CmdlineInput()], None, None, 42, True) telemetry.track_project_init("tests/") telemetry.track_shell_started("nlu") telemetry.track_rasa_x_local() telemetry.track_visualization() telemetry.track_core_model_test(5, True, default_agent) telemetry.track_nlu_model_test(TrainingData()) telemetry.track_markers_extraction_initiated("all", False, False, None) telemetry.track_markers_extracted(1) telemetry.track_markers_stats_computed(1) telemetry.track_markers_parsed_count(1, 1, 1) pending = asyncio.all_tasks() - initial await asyncio.gather(*pending) assert mock.call_count == 19 for args, _ in mock.call_args_list: event = args[0] # `metrics_id` automatically gets added to all event but is # not part of the schema so we need to remove it before validation del event["properties"]["metrics_id"] jsonschema.validate(instance=event["properties"], schema=schemas[event["event"]])