def test_ambiguous_data(self, caplog, classifier_class, training_data,
                            component_config, **kwargs):
        json_data = {
            "rasa_nlu_data": {
                "common_examples": [
                    {
                        "text": "good",
                        "intent": "affirm",
                        "entities": []
                    },
                    {
                        "text": "good morning",
                        "intent": "greet",
                        "entities": []
                    },
                    {
                        "text": "see you",
                        "intent": "goodbye",
                        "entities": []
                    },
                    {
                        "text": "nice to see you",
                        "intent": "greet",
                        "entities": []
                    },
                ]
            }
        }
        rasa_reader = RasaReader()
        data = rasa_reader.read_from_json(json_data)

        with pytest.warns(UserWarning) as record:
            self._train_classifier(classifier_class, data, component_config,
                                   **kwargs)
        assert len(record) == 2
    def test_identical_data(self, caplog, classifier_class, training_data,
                            component_config, **kwargs):
        json_data = {
            "rasa_nlu_data": {
                "common_examples": [
                    {
                        "text": "good",
                        "intent": "affirm",
                        "entities": []
                    },
                    {
                        "text": "good",
                        "intent": "goodbye",
                        "entities": []
                    },
                ]
            }
        }
        rasa_reader = RasaReader()
        data = rasa_reader.read_from_json(json_data)

        with pytest.warns(UserWarning) as record:
            self._train_classifier(classifier_class, data, component_config,
                                   **kwargs)
        assert len(record) == 1
        assert ("Remove (one of) the duplicates from the training data."
                in record[0].message.args[0])
Exemple #3
0
    def test_identical_data(self, caplog, classifier_class, training_data,
                            component_config, **kwargs):
        json_data = {
            "rasa_nlu_data": {
                "common_examples": [
                    {
                        "text": "good",
                        "intent": "affirm",
                        "entities": []
                    },
                    {
                        "text": "good",
                        "intent": "goodbye",
                        "entities": []
                    },
                ]
            }
        }
        rasa_reader = RasaReader()
        data = rasa_reader.read_from_json(json_data)

        with caplog.at_level(
                logging.DEBUG,
                logger="rasa.nlu.classifiers.keyword__intent_classifer"):
            self._train_classifier(classifier_class, data, component_config,
                                   **kwargs)
        assert len(caplog.records) == 1
Exemple #4
0
def test_dump_trainable_entities(entity_extractor: Optional[Text],
                                 expected_output: Text):
    training_data_json = {
        "rasa_nlu_data": {
            "common_examples": [{
                "text":
                "test",
                "intent":
                "greet",
                "entities": [{
                    "start": 0,
                    "end": 4,
                    "value": "random",
                    "entity": "word"
                }],
            }]
        }
    }
    if entity_extractor is not None:
        training_data_json["rasa_nlu_data"]["common_examples"][0]["entities"][
            0]["extractor"] = entity_extractor

    training_data_object = RasaReader().read_from_json(training_data_json)
    md_dump = MarkdownWriter().dumps(training_data_object)
    assert md_dump.splitlines()[1] == expected_output
    def test_valid_data(
        self, caplog, classifier_class, training_data, component_config, **kwargs
    ):
        json_data = {
            "rasa_nlu_data": {
                "common_examples": [
                    {"text": "good", "intent": "affirm", "entities": []},
                    {"text": "bye", "intent": "goodbye", "entities": []},
                    {"text": "see ya", "intent": "goodbye", "entities": []},
                    {"text": "yes", "intent": "affirm", "entities": []},
                    {"text": "ciao", "intent": "goodbye", "entities": []},
                ]
            }
        }
        rasa_reader = RasaReader()
        data = rasa_reader.read_from_json(json_data)

        with pytest.warns(None) as record:
            self._train_classifier(classifier_class, data, component_config, **kwargs)
        assert len(record) == 0