def test_ambiguous_data(self, caplog, classifier_class, training_data, component_config, **kwargs): json_data = { "rasa_nlu_data": { "common_examples": [ { "text": "good", "intent": "affirm", "entities": [] }, { "text": "good morning", "intent": "greet", "entities": [] }, { "text": "see you", "intent": "goodbye", "entities": [] }, { "text": "nice to see you", "intent": "greet", "entities": [] }, ] } } rasa_reader = RasaReader() data = rasa_reader.read_from_json(json_data) with pytest.warns(UserWarning) as record: self._train_classifier(classifier_class, data, component_config, **kwargs) assert len(record) == 2
def test_identical_data(self, caplog, classifier_class, training_data, component_config, **kwargs): json_data = { "rasa_nlu_data": { "common_examples": [ { "text": "good", "intent": "affirm", "entities": [] }, { "text": "good", "intent": "goodbye", "entities": [] }, ] } } rasa_reader = RasaReader() data = rasa_reader.read_from_json(json_data) with pytest.warns(UserWarning) as record: self._train_classifier(classifier_class, data, component_config, **kwargs) assert len(record) == 1 assert ("Remove (one of) the duplicates from the training data." in record[0].message.args[0])
def test_identical_data(self, caplog, classifier_class, training_data, component_config, **kwargs): json_data = { "rasa_nlu_data": { "common_examples": [ { "text": "good", "intent": "affirm", "entities": [] }, { "text": "good", "intent": "goodbye", "entities": [] }, ] } } rasa_reader = RasaReader() data = rasa_reader.read_from_json(json_data) with caplog.at_level( logging.DEBUG, logger="rasa.nlu.classifiers.keyword__intent_classifer"): self._train_classifier(classifier_class, data, component_config, **kwargs) assert len(caplog.records) == 1
def test_dump_trainable_entities(entity_extractor: Optional[Text], expected_output: Text): training_data_json = { "rasa_nlu_data": { "common_examples": [{ "text": "test", "intent": "greet", "entities": [{ "start": 0, "end": 4, "value": "random", "entity": "word" }], }] } } if entity_extractor is not None: training_data_json["rasa_nlu_data"]["common_examples"][0]["entities"][ 0]["extractor"] = entity_extractor training_data_object = RasaReader().read_from_json(training_data_json) md_dump = MarkdownWriter().dumps(training_data_object) assert md_dump.splitlines()[1] == expected_output
def test_valid_data( self, caplog, classifier_class, training_data, component_config, **kwargs ): json_data = { "rasa_nlu_data": { "common_examples": [ {"text": "good", "intent": "affirm", "entities": []}, {"text": "bye", "intent": "goodbye", "entities": []}, {"text": "see ya", "intent": "goodbye", "entities": []}, {"text": "yes", "intent": "affirm", "entities": []}, {"text": "ciao", "intent": "goodbye", "entities": []}, ] } } rasa_reader = RasaReader() data = rasa_reader.read_from_json(json_data) with pytest.warns(None) as record: self._train_classifier(classifier_class, data, component_config, **kwargs) assert len(record) == 0