async def test_train_model_empty_pipeline(component_builder): # Should return an empty pipeline _config = utilities.base_test_conf(pipeline_template=None) with pytest.raises(ValueError): await train( _config, data=DEFAULT_DATA_PATH, component_builder=component_builder )
async def test_random_seed(component_builder, tmpdir): """test if train result is the same for two runs of tf embedding""" _config = utilities.base_test_conf("supervised_embeddings") # set fixed random seed of the embedding intent classifier to 1 _config.set_component_attr(6, random_seed=1) # first run (trained_a, _, persisted_path_a) = await train( _config, path=tmpdir.strpath + "_a", data=DEFAULT_DATA_PATH, component_builder=component_builder, ) # second run (trained_b, _, persisted_path_b) = await train( _config, path=tmpdir.strpath + "_b", data=DEFAULT_DATA_PATH, component_builder=component_builder, ) loaded_a = Interpreter.load(persisted_path_a, component_builder) loaded_b = Interpreter.load(persisted_path_b, component_builder) result_a = loaded_a.parse("hello")["intent"]["confidence"] result_b = loaded_b.parse("hello")["intent"]["confidence"] assert result_a == result_b
def test_unintentional_synonyms_capitalized(component_builder): _config = utilities.base_test_conf("pretrained_embeddings_spacy") ner_syn = component_builder.create_component(_config.for_component(5), _config) examples = [ Message( "Any Mexican restaurant will do", { "intent": "restaurant_search", "entities": [{ "start": 4, "end": 11, "value": "Mexican", "entity": "cuisine" }], }, ), Message( "I want Tacos!", { "intent": "restaurant_search", "entities": [{ "start": 7, "end": 12, "value": "Mexican", "entity": "cuisine" }], }, ), ] ner_syn.train(TrainingData(training_examples=examples), _config) assert ner_syn.synonyms.get("mexican") is None assert ner_syn.synonyms.get("tacos") == "Mexican"
def test_handles_pipeline_with_non_existing_component(component_builder): _config = utilities.base_test_conf("pretrained_embeddings_spacy") _config.pipeline.append({"name": "my_made_up_component"}) with pytest.raises(Exception) as execinfo: train(_config, data=DEFAULT_DATA_PATH, component_builder=component_builder) assert "Failed to find component" in str(execinfo.value)
async def test_interpreter_on_pipeline_templates(pipeline_template, component_builder, tmpdir): test_data = "data/examples/rasa/demo-rasa.json" config = utilities.base_test_conf(pipeline_template) config["data"] = test_data td = training_data.load_data(test_data) interpreter = await utilities.interpreter_for( component_builder, "data/examples/rasa/demo-rasa.json", tmpdir.strpath, config) texts = ["good bye", "i am looking for an indian spot"] for text in texts: result = interpreter.parse(text, time=None) assert result["text"] == text assert not result["intent"]["name"] or result["intent"][ "name"] in td.intents assert result["intent"]["confidence"] >= 0 # Ensure the model doesn't detect entity types that are not present # Models on our test data set are not stable enough to # require the exact entities to be found for entity in result["entities"]: assert entity["entity"] in td.entities
async def test_train_named_model(component_builder, tmpdir): _config = utilities.base_test_conf("keyword") (trained, _, persisted_path) = await train( _config, path=tmpdir.strpath, data=DEFAULT_DATA_PATH, component_builder=component_builder, ) assert trained.pipeline normalized_path = os.path.dirname(os.path.normpath(persisted_path)) # should be saved in a dir named after a project assert normalized_path == tmpdir.strpath
def test_train_model(pipeline_template, component_builder, tmpdir): _config = utilities.base_test_conf(pipeline_template) (trained, _, persisted_path) = train( _config, path=tmpdir.strpath, data=DEFAULT_DATA_PATH, component_builder=component_builder) assert trained.pipeline loaded = Interpreter.load(persisted_path, component_builder) assert loaded.pipeline assert loaded.parse("hello") is not None assert loaded.parse("Hello today is Monday, again!") is not None
async def test_train_model_training_data_persisted(component_builder, tmpdir): _config = utilities.base_test_conf("keyword") (trained, _, persisted_path) = await train( _config, path=tmpdir.strpath, data=DEFAULT_DATA_PATH, component_builder=component_builder, persist_nlu_training_data=True, ) assert trained.pipeline loaded = Interpreter.load(persisted_path, component_builder) assert loaded.pipeline assert loaded.model_metadata.get("training_data") is not None
def test_interpreter(pipeline_template, component_builder, tmpdir): test_data = "data/examples/rasa/demo-rasa.json" _conf = utilities.base_test_conf(pipeline_template) _conf["data"] = test_data td = training_data.load_data(test_data) interpreter = utilities.interpreter_for( component_builder, "data/examples/rasa/demo-rasa.json", tmpdir.strpath, _conf) texts = ["good bye", "i am looking for an indian spot"] for text in texts: result = interpreter.parse(text, time=None) assert result['text'] == text assert (not result['intent']['name'] or result['intent']['name'] in td.intents) assert result['intent']['confidence'] >= 0 # Ensure the model doesn't detect entity types that are not present # Models on our test data set are not stable enough to # require the exact entities to be found for entity in result['entities']: assert entity['entity'] in td.entities
def test_whitespace_training(): _config = utilities.base_test_conf("supervised_embeddings") examples = [ Message( "Any Mexican restaurant will do", { "intent": "restaurant_search", "entities": [ {"start": 4, "end": 11, "value": "Mexican", "entity": "cuisine"} ], }, ), Message( "I want Tacos!", { "intent": "restaurant_search", "entities": [ {"start": 7, "end": 12, "value": "Mexican", "entity": "cuisine"} ], }, ), ] component_config = {"case_sensitive": False} tk = WhitespaceTokenizer(component_config) tk.train(TrainingData(training_examples=examples), _config) assert examples[0].data.get("tokens")[0].text == "any" assert examples[0].data.get("tokens")[1].text == "mexican" assert examples[0].data.get("tokens")[2].text == "restaurant" assert examples[0].data.get("tokens")[3].text == "will" assert examples[0].data.get("tokens")[4].text == "do" assert examples[1].data.get("tokens")[0].text == "i" assert examples[1].data.get("tokens")[1].text == "want" assert examples[1].data.get("tokens")[2].text == "tacos"
def test_whitespace_with_case(): from rasa.nlu.tokenizers.whitespace_tokenizer import WhitespaceTokenizer component_config = {"case_sensitive": False} tk = WhitespaceTokenizer(component_config) assert [t.text for t in tk.tokenize("Forecast for LUNCH")] == [ "forecast", "for", "lunch", ] component_config = {"case_sensitive": True} tk = WhitespaceTokenizer(component_config) assert [t.text for t in tk.tokenize("Forecast for LUNCH")] == [ "Forecast", "for", "LUNCH", ] component_config = {} tk = WhitespaceTokenizer(component_config) assert [t.text for t in tk.tokenize("Forecast for LUNCH")] == [ "Forecast", "for", "LUNCH", ] component_config = {"case_sensitive": False} tk = WhitespaceTokenizer(component_config) message = Message("Forecast for LUNCH") tk.process(message) assert message.data.get("tokens")[0].text == "forecast" assert message.data.get("tokens")[1].text == "for" assert message.data.get("tokens")[2].text == "lunch" _config = utilities.base_test_conf("supervised_embeddings") examples = [ Message( "Any Mexican restaurant will do", { "intent": "restaurant_search", "entities": [{ "start": 4, "end": 11, "value": "Mexican", "entity": "cuisine" }], }, ), Message( "I want Tacos!", { "intent": "restaurant_search", "entities": [{ "start": 7, "end": 12, "value": "Mexican", "entity": "cuisine" }], }, ), ] component_config = {"case_sensitive": False} tk = WhitespaceTokenizer(component_config) tk.train(TrainingData(training_examples=examples), _config) assert examples[0].data.get("tokens")[0].text == "any" assert examples[0].data.get("tokens")[1].text == "mexican" assert examples[0].data.get("tokens")[2].text == "restaurant" assert examples[0].data.get("tokens")[3].text == "will" assert examples[0].data.get("tokens")[4].text == "do" assert examples[1].data.get("tokens")[0].text == "i" assert examples[1].data.get("tokens")[1].text == "want" assert examples[1].data.get("tokens")[2].text == "tacos"