예제 #1
0
    def test_should_get_intents(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: intent1
utterances:
  - yala yili

---
type: intent
name: intent2
utterances:
  - yala yili yulu

---
type: intent
name: intent3
utterances:
  - yili yulu yele""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        classifier_config = LogRegIntentClassifierConfig(random_seed=42)
        parser_config = ProbabilisticIntentParserConfig(classifier_config)
        parser = ProbabilisticIntentParser(parser_config).fit(dataset)
        text = "yala yili yulu"

        # When
        results = parser.get_intents(text)
        intents = [res[RES_INTENT_NAME] for res in results]

        # Then
        expected_intents = ["intent2", "intent1", "intent3", None]

        self.assertEqual(expected_intents, intents)
    def test_should_not_retrain_intent_classifier_when_no_force_retrain(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: MakeTea
utterances:
- make me a [beverage_temperature:Temperature](hot) cup of tea
- make me [number_of_cups:snips/number](five) tea cups

---
type: intent
name: MakeCoffee
utterances:
- make me [number_of_cups:snips/number](one) cup of coffee please
- brew [number_of_cups] cups of coffee""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        parser = ProbabilisticIntentParser()
        intent_classifier = LogRegIntentClassifier()
        intent_classifier.fit(dataset)
        parser.intent_classifier = intent_classifier

        # When / Then
        with patch("snips_nlu.intent_classifier.log_reg_classifier"
                   ".LogRegIntentClassifier.fit") as mock_fit:
            parser.fit(dataset, force_retrain=False)
            mock_fit.assert_not_called()
예제 #3
0
    def test_fitting_should_be_reproducible_after_serialization(self):
        # Given
        dataset = BEVERAGE_DATASET
        validated_dataset = validate_and_format_dataset(dataset)

        seed1 = 666
        seed2 = 42
        config = ProbabilisticIntentParserConfig(
            intent_classifier_config=LogRegIntentClassifierConfig(
                random_seed=seed1),
            slot_filler_config=CRFSlotFillerConfig(random_seed=seed2))
        parser = ProbabilisticIntentParser(config)
        parser_dict = parser.to_dict()

        # When
        fitted_parser_1 = ProbabilisticIntentParser.from_dict(parser_dict).fit(
            validated_dataset)

        fitted_parser_2 = ProbabilisticIntentParser.from_dict(parser_dict).fit(
            validated_dataset)

        # Then
        feature_weights_1 = fitted_parser_1.slot_fillers[
            "MakeTea"].crf_model.state_features_
        feature_weights_2 = fitted_parser_2.slot_fillers[
            "MakeTea"].crf_model.state_features_
        self.assertEqual(feature_weights_1, feature_weights_2)
    def test_should_get_intents(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: intent1
utterances:
  - yala yili

---
type: intent
name: intent2
utterances:
  - yala yili yulu

---
type: intent
name: intent3
utterances:
  - yili yulu yele""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        shared = self.get_shared_data(dataset)
        shared[RANDOM_STATE] = 42
        parser = ProbabilisticIntentParser(**shared).fit(dataset)
        text = "yala yili yulu"

        # When
        results = parser.get_intents(text)
        intents = [res[RES_INTENT_NAME] for res in results]

        # Then
        expected_intents = ["intent2", "intent1", "intent3", None]

        self.assertEqual(expected_intents, intents)
    def test_should_parse_with_filter(self):
        dataset_stream = io.StringIO("""
---
type: intent
name: intent1
utterances:
  - "[slot1:entity1](foo) bar"

---
type: intent
name: intent2
utterances:
  - foo bar [slot2:entity2](baz)

---
type: intent
name: intent3
utterances:
  - foz for [slot3:entity3](baz)""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        shared = self.get_shared_data(dataset)
        shared[RANDOM_STATE] = 42
        parser = ProbabilisticIntentParser(**shared)
        parser.fit(dataset)
        text = "foo bar baz"

        # When
        result = parser.parse(text, intents=["intent1", "intent3"])

        # Then
        expected_slots = [unresolved_slot((0, 3), "foo", "entity1", "slot1")]

        self.assertEqual("intent1", result[RES_INTENT][RES_INTENT_NAME])
        self.assertEqual(expected_slots, result[RES_SLOTS])
    def test_get_slots_should_raise_with_unknown_intent(self):
        # Given
        slots_dataset_stream = io.StringIO("""
---
type: intent
name: greeting1
utterances:
  - Hello [name1](John)

---
type: intent
name: goodbye
utterances:
  - Goodbye [name](Eric)""")
        dataset = Dataset.from_yaml_files("en", [slots_dataset_stream]).json

        # pylint:disable=unused-variable
        @IntentClassifier.register("my_intent_classifier", True)
        class MyIntentClassifier(MockIntentClassifier):
            pass

        @SlotFiller.register("my_slot_filler", True)
        class MySlotFiller(MockSlotFiller):
            pass

        # pylint:enable=unused-variable
        config = ProbabilisticIntentParserConfig(
            intent_classifier_config="my_intent_classifier",
            slot_filler_config="my_slot_filler")
        parser = ProbabilisticIntentParser(config).fit(dataset)

        # When / Then
        with self.assertRaises(IntentNotFoundError):
            parser.get_slots("Hello John", "greeting3")
    def test_should_not_parse_when_not_fitted(self):
        # Given
        parser = ProbabilisticIntentParser()

        # When / Then
        self.assertFalse(parser.fitted)
        with self.assertRaises(NotTrained):
            parser.parse("foobar")
예제 #8
0
    def test_should_not_retrain_slot_filler_when_no_force_retrain(self):
        # Given
        parser = ProbabilisticIntentParser()
        slot_filler = CRFSlotFiller()
        slot_filler.fit(BEVERAGE_DATASET, "MakeCoffee")
        parser.slot_fillers["MakeCoffee"] = slot_filler

        # When / Then
        with patch("snips_nlu.slot_filler.crf_slot_filler.CRFSlotFiller.fit") \
                as mock_fit:
            parser.fit(BEVERAGE_DATASET, force_retrain=False)
            self.assertEqual(1, mock_fit.call_count)
예제 #9
0
    def test_should_not_retrain_intent_classifier_when_no_force_retrain(self):
        # Given
        parser = ProbabilisticIntentParser()
        intent_classifier = LogRegIntentClassifier()
        intent_classifier.fit(BEVERAGE_DATASET)
        parser.intent_classifier = intent_classifier

        # When / Then
        with patch("snips_nlu.intent_classifier.log_reg_classifier"
                   ".LogRegIntentClassifier.fit") as mock_fit:
            parser.fit(BEVERAGE_DATASET, force_retrain=False)
            mock_fit.assert_not_called()
예제 #10
0
    def test_should_be_serializable_into_bytearray(self):
        # Given
        dataset = BEVERAGE_DATASET
        intent_parser = ProbabilisticIntentParser().fit(dataset)

        # When
        intent_parser_bytes = intent_parser.to_byte_array()
        loaded_intent_parser = ProbabilisticIntentParser.from_byte_array(
            intent_parser_bytes)
        result = loaded_intent_parser.parse("make me two cups of tea")

        # Then
        self.assertEqual("MakeTea", result[RES_INTENT][RES_INTENT_NAME])
    def test_should_get_no_slots_with_none_intent(self):
        # Given
        slots_dataset_stream = io.StringIO("""
---
type: intent
name: greeting
utterances:
  - Hello [name](John)""")
        dataset = Dataset.from_yaml_files("en", [slots_dataset_stream]).json
        parser = ProbabilisticIntentParser().fit(dataset)

        # When
        slots = parser.get_slots("Hello John", None)

        # Then
        self.assertListEqual([], slots)
    def test_should_be_serializable_into_bytearray(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: MakeTea
utterances:
- make me a [beverage_temperature:Temperature](hot) cup of tea
- make me [number_of_cups:snips/number](five) tea cups

---
type: intent
name: MakeCoffee
utterances:
- make me [number_of_cups:snips/number](one) cup of coffee please
- brew [number_of_cups] cups of coffee""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json

        # pylint:disable=unused-variable
        @IntentClassifier.register("my_intent_classifier", True)
        class MyIntentClassifier(MockIntentClassifier):
            def get_intent(self, text, intents_filter):
                if "tea" in text:
                    return intent_classification_result("MakeTea", 1.0)
                elif "coffee" in text:
                    return intent_classification_result("MakeCoffee", 1.0)
                return intent_classification_result(None, 1.0)

        @SlotFiller.register("my_slot_filler", True)
        class MySlotFiller(MockSlotFiller):
            pass

        # pylint:enable=unused-variable

        parser_config = ProbabilisticIntentParserConfig(
            intent_classifier_config="my_intent_classifier",
            slot_filler_config="my_slot_filler")
        parser = ProbabilisticIntentParser(parser_config).fit(dataset)

        # When
        intent_parser_bytes = parser.to_byte_array()
        loaded_intent_parser = ProbabilisticIntentParser.from_byte_array(
            intent_parser_bytes)
        result = loaded_intent_parser.parse("make me two cups of tea")

        # Then
        self.assertEqual("MakeTea", result[RES_INTENT][RES_INTENT_NAME])
    def test_should_not_retrain_slot_filler_when_no_force_retrain(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: MakeTea
utterances:
- make me a [beverage_temperature:Temperature](hot) cup of tea
- make me [number_of_cups:snips/number](five) tea cups

---
type: intent
name: MakeCoffee
utterances:
- make me [number_of_cups:snips/number](one) cup of coffee please
- brew [number_of_cups] cups of coffee""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json

        # pylint:disable=unused-variable
        @IntentClassifier.register("my_intent_classifier", True)
        class MyIntentClassifier(MockIntentClassifier):
            pass

        @SlotFiller.register("my_slot_filler", True)
        class MySlotFiller(MockSlotFiller):
            fit_call_count = 0

            def fit(self, dataset, intent):
                MySlotFiller.fit_call_count += 1
                return super(MySlotFiller, self).fit(dataset, intent)

        # pylint:enable=unused-variable

        parser_config = ProbabilisticIntentParserConfig(
            intent_classifier_config="my_intent_classifier",
            slot_filler_config="my_slot_filler")
        parser = ProbabilisticIntentParser(parser_config)
        slot_filler = MySlotFiller(None)
        slot_filler.fit(dataset, "MakeCoffee")
        parser.slot_fillers["MakeCoffee"] = slot_filler

        # When / Then
        parser.fit(dataset, force_retrain=False)
        self.assertEqual(2, MySlotFiller.fit_call_count)
예제 #14
0
    def test_should_be_serializable_before_fitting(self):
        # Given
        parser = ProbabilisticIntentParser()

        # When
        parser.persist(self.tmp_file_path)

        # Then
        expected_parser_dict = {
            "config": {
                "unit_name": "probabilistic_intent_parser",
                "slot_filler_config": CRFSlotFillerConfig().to_dict(),
                "intent_classifier_config":
                    LogRegIntentClassifierConfig().to_dict()
            },
            "slot_fillers": []
        }
        metadata = {"unit_name": "probabilistic_intent_parser"}
        self.assertJsonContent(self.tmp_file_path / "metadata.json", metadata)
        self.assertJsonContent(self.tmp_file_path / "intent_parser.json",
                               expected_parser_dict)
예제 #15
0
    def test_should_parse_top_intents(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: intent1
utterances:
  - "[entity1](foo) bar"

---
type: intent
name: intent2
utterances:
  - foo bar [entity2](baz)

---
type: intent
name: intent3
utterances:
  - foz for [entity3](baz)""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        classifier_config = LogRegIntentClassifierConfig(random_seed=42)
        slot_filler_config = CRFSlotFillerConfig(random_seed=42)
        parser_config = ProbabilisticIntentParserConfig(
            classifier_config, slot_filler_config)
        parser = ProbabilisticIntentParser(parser_config)
        parser.fit(dataset)
        text = "foo bar baz"

        # When
        results = parser.parse(text, top_n=2)
        intents = [res[RES_INTENT][RES_INTENT_NAME] for res in results]
        entities = [[s[RES_VALUE] for s in res[RES_SLOTS]] for res in results]

        # Then
        expected_intents = ["intent2", "intent1"]
        expected_entities = [["baz"], ["foo"]]

        self.assertListEqual(expected_intents, intents)
        self.assertListEqual(expected_entities, entities)
예제 #16
0
    def test_should_be_serializable_before_fitting(self):
        # Given
        parser = ProbabilisticIntentParser()

        # When
        actual_parser_dict = parser.to_dict()

        # Then
        expected_parser_dict = {
            "unit_name": "probabilistic_intent_parser",
            "config": {
                "unit_name":
                "probabilistic_intent_parser",
                "slot_filler_config":
                CRFSlotFillerConfig().to_dict(),
                "intent_classifier_config":
                LogRegIntentClassifierConfig().to_dict()
            },
            "intent_classifier": None,
            "slot_fillers": dict(),
        }
        self.assertDictEqual(actual_parser_dict, expected_parser_dict)
    def test_should_parse_top_intents(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: intent1
utterances:
  - "[entity1](foo) bar"

---
type: intent
name: intent2
utterances:
  - foo bar [entity2](baz)

---
type: intent
name: intent3
utterances:
  - foz for [entity3](baz)""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        shared = self.get_shared_data(dataset)
        shared[RANDOM_STATE] = 42
        parser = ProbabilisticIntentParser(**shared)
        parser.fit(dataset)
        text = "foo bar baz"

        # When
        results = parser.parse(text, top_n=2)
        intents = [res[RES_INTENT][RES_INTENT_NAME] for res in results]
        entities = [[s[RES_VALUE] for s in res[RES_SLOTS]] for res in results]

        # Then
        expected_intents = ["intent2", "intent1"]
        expected_entities = [["baz"], ["foo"]]

        self.assertListEqual(expected_intents, intents)
        self.assertListEqual(expected_entities, entities)
    def test_should_get_slots(self):
        # Given
        slots_dataset_stream = io.StringIO("""
---
type: intent
name: greeting1
utterances:
  - Hello [name1](John)

---
type: intent
name: greeting2
utterances:
  - Hello [name2](John)

---
type: intent
name: greeting3
utterances:
  - Hello John""")
        dataset = Dataset.from_yaml_files("en", [slots_dataset_stream]).json
        parser = ProbabilisticIntentParser().fit(dataset)

        # When
        slots_greeting1 = parser.get_slots("Hello John", "greeting1")
        slots_greeting2 = parser.get_slots("Hello John", "greeting2")
        slots_goodbye = parser.get_slots("Hello John", "greeting3")

        # Then
        self.assertEqual(1, len(slots_greeting1))
        self.assertEqual(1, len(slots_greeting2))
        self.assertEqual(0, len(slots_goodbye))

        self.assertEqual("John", slots_greeting1[0][RES_VALUE])
        self.assertEqual("name1", slots_greeting1[0][RES_ENTITY])
        self.assertEqual("John", slots_greeting2[0][RES_VALUE])
        self.assertEqual("name2", slots_greeting2[0][RES_ENTITY])
예제 #19
0
    def test_should_parse_with_filter(self):
        dataset_stream = io.StringIO("""
---
type: intent
name: intent1
utterances:
  - "[slot1:entity1](foo) bar"

---
type: intent
name: intent2
utterances:
  - foo bar [slot2:entity2](baz)

---
type: intent
name: intent3
utterances:
  - foz for [slot3:entity3](baz)""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        classifier_config = LogRegIntentClassifierConfig(random_seed=42)
        slot_filler_config = CRFSlotFillerConfig(random_seed=42)
        parser_config = ProbabilisticIntentParserConfig(
            classifier_config, slot_filler_config)
        parser = ProbabilisticIntentParser(parser_config)
        parser.fit(dataset)
        text = "foo bar baz"

        # When
        result = parser.parse(text, intents=["intent1", "intent3"])

        # Then
        expected_slots = [unresolved_slot((0, 3), "foo", "entity1", "slot1")]

        self.assertEqual("intent1", result[RES_INTENT][RES_INTENT_NAME])
        self.assertEqual(expected_slots, result[RES_SLOTS])
예제 #20
0
    def test_should_be_deserializable(self):
        # When
        register_processing_unit(TestIntentClassifier)
        register_processing_unit(TestSlotFiller)

        config = ProbabilisticIntentParserConfig(
            intent_classifier_config=TestIntentClassifierConfig(),
            slot_filler_config=TestSlotFillerConfig()
        )
        parser_dict = {
            "unit_name": "probabilistic_intent_parser",
            "slot_fillers": [
                {
                    "intent": "MakeCoffee",
                    "slot_filler_name": "slot_filler_MakeCoffee"
                },
                {
                    "intent": "MakeTea",
                    "slot_filler_name": "slot_filler_MakeTea"
                }
            ],
            "config": config.to_dict(),
        }
        self.tmp_file_path.mkdir()
        (self.tmp_file_path / "intent_classifier").mkdir()
        (self.tmp_file_path / "slot_filler_MakeCoffee").mkdir()
        (self.tmp_file_path / "slot_filler_MakeTea").mkdir()
        self.writeJsonContent(self.tmp_file_path / "intent_parser.json",
                              parser_dict)
        self.writeJsonContent(
            self.tmp_file_path / "intent_classifier" / "metadata.json",
            {"unit_name": "test_intent_classifier"})
        self.writeJsonContent(
            self.tmp_file_path / "slot_filler_MakeCoffee" / "metadata.json",
            {"unit_name": "test_slot_filler"})
        self.writeJsonContent(
            self.tmp_file_path / "slot_filler_MakeTea" / "metadata.json",
            {"unit_name": "test_slot_filler"})

        # When
        parser = ProbabilisticIntentParser.from_path(self.tmp_file_path)

        # Then
        self.assertDictEqual(parser.config.to_dict(), config.to_dict())
        self.assertIsNotNone(parser.intent_classifier)
        self.assertListEqual(sorted(parser.slot_fillers),
                             ["MakeCoffee", "MakeTea"])
예제 #21
0
    def test_should_be_deserializable_before_fitting(self):
        # When
        config = ProbabilisticIntentParserConfig().to_dict()
        parser_dict = {
            "unit_name": "probabilistic_intent_parser",
            "config": config,
            "intent_classifier": None,
            "slot_fillers": dict(),
        }

        # When
        parser = ProbabilisticIntentParser.from_dict(parser_dict)

        # Then
        self.assertEqual(parser.config.to_dict(), config)
        self.assertIsNone(parser.intent_classifier)
        self.assertDictEqual(dict(), parser.slot_fillers)
예제 #22
0
    def test_should_be_serializable(self):
        # Given
        register_processing_unit(TestIntentClassifier)
        register_processing_unit(TestSlotFiller)

        parser_config = ProbabilisticIntentParserConfig(
            intent_classifier_config=TestIntentClassifierConfig(),
            slot_filler_config=TestSlotFillerConfig())
        parser = ProbabilisticIntentParser(parser_config)
        parser.fit(validate_and_format_dataset(BEVERAGE_DATASET))

        # When
        parser.persist(self.tmp_file_path)

        # Then
        expected_parser_config = {
            "unit_name": "probabilistic_intent_parser",
            "slot_filler_config": {
                "unit_name": "test_slot_filler"
            },
            "intent_classifier_config": {
                "unit_name": "test_intent_classifier"
            }
        }
        expected_parser_dict = {
            "unit_name":
            "probabilistic_intent_parser",
            "config":
            expected_parser_config,
            "slot_fillers": [{
                "intent": "MakeCoffee",
                "slot_filler_name": "slot_filler_0"
            }, {
                "intent": "MakeTea",
                "slot_filler_name": "slot_filler_1"
            }]
        }
        metadata = {"unit_name": "probabilistic_intent_parser"}
        metadata_slot_filler = {"unit_name": "test_slot_filler"}
        metadata_intent_classifier = {"unit_name": "test_intent_classifier"}

        self.assertJsonContent(self.tmp_file_path / "metadata.json", metadata)
        self.assertJsonContent(self.tmp_file_path / "intent_parser.json",
                               expected_parser_dict)
        self.assertJsonContent(
            self.tmp_file_path / "intent_classifier" / "metadata.json",
            metadata_intent_classifier)
        self.assertJsonContent(
            self.tmp_file_path / "slot_filler_0" / "metadata.json",
            metadata_slot_filler)
        self.assertJsonContent(
            self.tmp_file_path / "slot_filler_1" / "metadata.json",
            metadata_slot_filler)
예제 #23
0
    def test_should_be_deserializable_before_fitting(self):
        # When
        config = ProbabilisticIntentParserConfig().to_dict()
        parser_dict = {
            "unit_name": "probabilistic_intent_parser",
            "config": config,
            "intent_classifier": None,
            "slot_fillers": dict(),
        }
        self.tmp_file_path.mkdir()
        metadata = {"unit_name": "probabilistic_intent_parser"}
        self.writeJsonContent(self.tmp_file_path / "metadata.json", metadata)
        self.writeJsonContent(self.tmp_file_path / "intent_parser.json",
                              parser_dict)

        # When
        parser = ProbabilisticIntentParser.from_path(self.tmp_file_path)

        # Then
        self.assertEqual(parser.config.to_dict(), config)
        self.assertIsNone(parser.intent_classifier)
        self.assertDictEqual(dict(), parser.slot_fillers)
예제 #24
0
    def test_fitting_should_be_reproducible_after_serialization(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: MakeTea
utterances:
- make me a [beverage_temperature:Temperature](hot) cup of tea
- make me [number_of_cups:snips/number](five) tea cups

---
type: intent
name: MakeCoffee
utterances:
- make me [number_of_cups:snips/number](one) cup of coffee please
- brew [number_of_cups] cups of coffee""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json

        seed1 = 666
        seed2 = 42
        config = ProbabilisticIntentParserConfig(
            intent_classifier_config=LogRegIntentClassifierConfig(
                random_seed=seed1),
            slot_filler_config=CRFSlotFillerConfig(random_seed=seed2))
        shared = self.get_shared_data(dataset)
        parser = ProbabilisticIntentParser(config, **shared)
        parser.persist(self.tmp_file_path)

        # When
        fitted_parser_1 = ProbabilisticIntentParser.from_path(
            self.tmp_file_path, **shared).fit(dataset)

        fitted_parser_2 = ProbabilisticIntentParser.from_path(
            self.tmp_file_path, **shared).fit(dataset)

        # Then
        feature_weights_1 = fitted_parser_1.slot_fillers[
            "MakeTea"].crf_model.state_features_
        feature_weights_2 = fitted_parser_2.slot_fillers[
            "MakeTea"].crf_model.state_features_
        self.assertEqual(feature_weights_1, feature_weights_2)
    def test_should_be_deserializable(self):
        # When
        @IntentClassifier.register("my_intent_classifier", True)
        class MyIntentClassifier(MockIntentClassifier):
            pass

        @SlotFiller.register("my_slot_filler", True)
        class MySlotFiller(MockSlotFiller):
            pass

        parser_config = {
            "unit_name": "probabilistic_intent_parser",
            "intent_classifier_config": {
                "unit_name": "my_intent_classifier"
            },
            "slot_filler_config": {
                "unit_name": "my_slot_filler"
            }
        }
        parser_dict = {
            "unit_name":
            "probabilistic_intent_parser",
            "slot_fillers": [{
                "intent": "MakeCoffee",
                "slot_filler_name": "slot_filler_MakeCoffee"
            }, {
                "intent": "MakeTea",
                "slot_filler_name": "slot_filler_MakeTea"
            }],
            "config":
            parser_config,
        }
        self.tmp_file_path.mkdir()
        (self.tmp_file_path / "intent_classifier").mkdir()
        (self.tmp_file_path / "slot_filler_MakeCoffee").mkdir()
        (self.tmp_file_path / "slot_filler_MakeTea").mkdir()
        self.writeJsonContent(self.tmp_file_path / "intent_parser.json",
                              parser_dict)
        self.writeJsonContent(
            self.tmp_file_path / "intent_classifier" / "metadata.json", {
                "unit_name": "my_intent_classifier",
                "fitted": True
            })
        self.writeJsonContent(
            self.tmp_file_path / "slot_filler_MakeCoffee" / "metadata.json", {
                "unit_name": "my_slot_filler",
                "fitted": True
            })
        self.writeJsonContent(
            self.tmp_file_path / "slot_filler_MakeTea" / "metadata.json", {
                "unit_name": "my_slot_filler",
                "fitted": True
            })

        # When
        parser = ProbabilisticIntentParser.from_path(self.tmp_file_path)

        # Then
        self.assertDictEqual(parser.config.to_dict(), parser_config)
        self.assertIsInstance(parser.intent_classifier, MyIntentClassifier)
        self.assertListEqual(sorted(parser.slot_fillers),
                             ["MakeCoffee", "MakeTea"])
        for slot_filler in itervalues(parser.slot_fillers):
            self.assertIsInstance(slot_filler, MySlotFiller)
    def test_should_be_serializable(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: MakeTea
utterances:
- make me a [beverage_temperature:Temperature](hot) cup of tea
- make me [number_of_cups:snips/number](five) tea cups

---
type: intent
name: MakeCoffee
utterances:
- make me [number_of_cups:snips/number](one) cup of coffee please
- brew [number_of_cups] cups of coffee""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json

        # pylint:disable=unused-variable
        @IntentClassifier.register("my_intent_classifier", True)
        class MyIntentClassifier(MockIntentClassifier):
            pass

        @SlotFiller.register("my_slot_filler", True)
        class MySlotFiller(MockSlotFiller):
            pass

        # pylint:enable=unused-variable

        parser_config = ProbabilisticIntentParserConfig(
            intent_classifier_config="my_intent_classifier",
            slot_filler_config="my_slot_filler")
        parser = ProbabilisticIntentParser(parser_config).fit(dataset)

        # When
        parser.persist(self.tmp_file_path)

        # Then
        expected_parser_config = {
            "unit_name": "probabilistic_intent_parser",
            "slot_filler_config": {
                "unit_name": "my_slot_filler"
            },
            "intent_classifier_config": {
                "unit_name": "my_intent_classifier"
            }
        }
        expected_parser_dict = {
            "config":
            expected_parser_config,
            "slot_fillers": [{
                "intent": "MakeCoffee",
                "slot_filler_name": "slot_filler_0"
            }, {
                "intent": "MakeTea",
                "slot_filler_name": "slot_filler_1"
            }]
        }
        metadata = {
            "unit_name": "probabilistic_intent_parser",
        }
        metadata_slot_filler = {"unit_name": "my_slot_filler", "fitted": True}
        metadata_intent_classifier = {
            "unit_name": "my_intent_classifier",
            "fitted": True
        }

        self.assertJsonContent(self.tmp_file_path / "metadata.json", metadata)
        self.assertJsonContent(self.tmp_file_path / "intent_parser.json",
                               expected_parser_dict)
        self.assertJsonContent(
            self.tmp_file_path / "intent_classifier" / "metadata.json",
            metadata_intent_classifier)
        self.assertJsonContent(
            self.tmp_file_path / "slot_filler_0" / "metadata.json",
            metadata_slot_filler)
        self.assertJsonContent(
            self.tmp_file_path / "slot_filler_1" / "metadata.json",
            metadata_slot_filler)
예제 #27
0
    def test_should_be_deserializable(self):
        # When
        class TestIntentClassifierConfig(ProcessingUnitConfig):
            unit_name = "test_intent_classifier"

            def to_dict(self):
                return {"unit_name": self.unit_name}

            @classmethod
            def from_dict(cls, obj_dict):
                return TestIntentClassifierConfig()

        class TestIntentClassifier(IntentClassifier):
            unit_name = "test_intent_classifier"
            config_type = TestIntentClassifierConfig

            def get_intent(self, text, intents_filter):
                return None

            def fit(self, dataset):
                return self

            def to_dict(self):
                return {
                    "unit_name": self.unit_name,
                }

            @classmethod
            def from_dict(cls, unit_dict):
                conf = cls.config_type()
                return TestIntentClassifier(conf)

        class TestSlotFillerConfig(ProcessingUnitConfig):
            unit_name = "test_slot_filler"

            def to_dict(self):
                return {"unit_name": self.unit_name}

            @classmethod
            def from_dict(cls, obj_dict):
                return TestSlotFillerConfig()

        class TestSlotFiller(SlotFiller):
            unit_name = "test_slot_filler"
            config_type = TestSlotFillerConfig

            def get_slots(self, text):
                return []

            def fit(self, dataset, intent):
                return self

            def to_dict(self):
                return {
                    "unit_name": self.unit_name,
                }

            @classmethod
            def from_dict(cls, unit_dict):
                conf = cls.config_type()
                return TestSlotFiller(conf)

        register_processing_unit(TestIntentClassifier)
        register_processing_unit(TestSlotFiller)

        config = ProbabilisticIntentParserConfig(
            intent_classifier_config=TestIntentClassifierConfig(),
            slot_filler_config=TestSlotFillerConfig())
        parser_dict = {
            "unit_name": "probabilistic_intent_parser",
            "intent_classifier": {
                "unit_name": "test_intent_classifier"
            },
            "slot_fillers": {
                "MakeCoffee": {
                    "unit_name": "test_slot_filler"
                },
                "MakeTea": {
                    "unit_name": "test_slot_filler"
                }
            },
            "config": config.to_dict(),
        }

        # When
        parser = ProbabilisticIntentParser.from_dict(parser_dict)

        # Then
        self.assertDictEqual(parser.config.to_dict(), config.to_dict())
        self.assertIsNotNone(parser.intent_classifier)
        self.assertListEqual(sorted(parser.slot_fillers),
                             ["MakeCoffee", "MakeTea"])
예제 #28
0
    def test_should_be_serializable(self):
        # Given
        class TestIntentClassifierConfig(ProcessingUnitConfig):
            unit_name = "test_intent_classifier"

            def to_dict(self):
                return {"unit_name": self.unit_name}

            @classmethod
            def from_dict(cls, obj_dict):
                return TestIntentClassifierConfig()

        class TestIntentClassifier(IntentClassifier):
            unit_name = "test_intent_classifier"
            config_type = TestIntentClassifierConfig

            def get_intent(self, text, intents_filter):
                return None

            def fit(self, dataset):
                return self

            def to_dict(self):
                return {
                    "unit_name": self.unit_name,
                }

            @classmethod
            def from_dict(cls, unit_dict):
                config = cls.config_type()
                return TestIntentClassifier(config)

        class TestSlotFillerConfig(ProcessingUnitConfig):
            unit_name = "test_slot_filler"

            def to_dict(self):
                return {"unit_name": self.unit_name}

            @classmethod
            def from_dict(cls, obj_dict):
                return TestSlotFillerConfig()

        class TestSlotFiller(SlotFiller):
            unit_name = "test_slot_filler"
            config_type = TestSlotFillerConfig

            def get_slots(self, text):
                return []

            def fit(self, dataset, intent):
                return self

            def to_dict(self):
                return {
                    "unit_name": self.unit_name,
                }

            @classmethod
            def from_dict(cls, unit_dict):
                config = cls.config_type()
                return TestSlotFiller(config)

        register_processing_unit(TestIntentClassifier)
        register_processing_unit(TestSlotFiller)

        parser_config = ProbabilisticIntentParserConfig(
            intent_classifier_config=TestIntentClassifierConfig(),
            slot_filler_config=TestSlotFillerConfig())
        parser = ProbabilisticIntentParser(parser_config)
        parser.fit(validate_and_format_dataset(BEVERAGE_DATASET))

        # When
        actual_parser_dict = parser.to_dict()

        # Then
        expected_parser_config = {
            "unit_name": "probabilistic_intent_parser",
            "slot_filler_config": {
                "unit_name": "test_slot_filler"
            },
            "intent_classifier_config": {
                "unit_name": "test_intent_classifier"
            }
        }
        expected_parser_dict = {
            "unit_name": "probabilistic_intent_parser",
            "config": expected_parser_config,
            "intent_classifier": {
                "unit_name": "test_intent_classifier"
            },
            "slot_fillers": {
                "MakeCoffee": {
                    "unit_name": "test_slot_filler"
                },
                "MakeTea": {
                    "unit_name": "test_slot_filler"
                },
            },
        }
        self.assertDictEqual(actual_parser_dict, expected_parser_dict)