async def test_events_schema( monkeypatch: MonkeyPatch, default_agent: Agent, config_path: Text ): # this allows us to patch the printing part used in debug mode to collect the # reported events monkeypatch.setenv("RASA_TELEMETRY_DEBUG", "true") monkeypatch.setenv("RASA_TELEMETRY_ENABLED", "true") mock = Mock() monkeypatch.setattr(telemetry, "print_telemetry_event", mock) with open(TELEMETRY_EVENTS_JSON) as f: schemas = json.load(f)["events"] initial = asyncio.all_tasks() # Generate all known backend telemetry events, and then use events.json to # validate their schema. training_data = TrainingDataImporter.load_from_config(config_path) with telemetry.track_model_training(training_data, "rasa"): await asyncio.sleep(1) telemetry.track_telemetry_disabled() telemetry.track_data_split(0.5, "nlu") telemetry.track_validate_files(True) telemetry.track_data_convert("yaml", "nlu") telemetry.track_tracker_export(5, TrackerStore(domain=None), EventBroker()) telemetry.track_interactive_learning_start(True, False) telemetry.track_server_start([CmdlineInput()], None, None, 42, True) telemetry.track_project_init("tests/") telemetry.track_shell_started("nlu") telemetry.track_rasa_x_local() telemetry.track_visualization() telemetry.track_core_model_test(5, True, default_agent) telemetry.track_nlu_model_test(TrainingData()) pending = asyncio.all_tasks() - initial await asyncio.gather(*pending) assert mock.call_count == 15 for args, _ in mock.call_args_list: event = args[0] # `metrics_id` automatically gets added to all event but is # not part of the schema so we need to remove it before validation del event["properties"]["metrics_id"] jsonschema.validate( instance=event["properties"], schema=schemas[event["event"]] )
def split_nlu_data(args: argparse.Namespace) -> None: """Load data from a file path and split the NLU data into test and train examples. Args: args: Commandline arguments """ data_path = rasa.cli.utils.get_validated_path(args.nlu, "nlu", DEFAULT_DATA_PATH) data_path = rasa.shared.data.get_nlu_directory(data_path) nlu_data = rasa.shared.nlu.training_data.loading.load_data(data_path) extension = rasa.shared.nlu.training_data.util.get_file_format_extension(data_path) train, test = nlu_data.train_test_split(args.training_fraction, args.random_seed) train.persist(args.out, filename=f"training_data{extension}") test.persist(args.out, filename=f"test_data{extension}") telemetry.track_data_split(args.training_fraction, "nlu")