Example #1
0
    def test_fitting_should_be_reproducible_after_serialization(self):
        # Given
        dataset = BEVERAGE_DATASET
        validated_dataset = validate_and_format_dataset(dataset)

        seed1 = 666
        seed2 = 42
        config = ProbabilisticIntentParserConfig(
            intent_classifier_config=LogRegIntentClassifierConfig(
                random_seed=seed1),
            slot_filler_config=CRFSlotFillerConfig(random_seed=seed2))
        parser = ProbabilisticIntentParser(config)
        parser.persist(self.tmp_file_path)

        # When
        fitted_parser_1 = ProbabilisticIntentParser.from_path(
            self.tmp_file_path).fit(validated_dataset)

        fitted_parser_2 = ProbabilisticIntentParser.from_path(
            self.tmp_file_path).fit(validated_dataset)

        # Then
        feature_weights_1 = fitted_parser_1.slot_fillers[
            "MakeTea"].crf_model.state_features_
        feature_weights_2 = fitted_parser_2.slot_fillers[
            "MakeTea"].crf_model.state_features_
        self.assertEqual(feature_weights_1, feature_weights_2)
Example #2
0
    def test_fitting_should_be_reproducible_after_serialization(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: MakeTea
utterances:
- make me a [beverage_temperature:Temperature](hot) cup of tea
- make me [number_of_cups:snips/number](five) tea cups

---
type: intent
name: MakeCoffee
utterances:
- make me [number_of_cups:snips/number](one) cup of coffee please
- brew [number_of_cups] cups of coffee""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json

        seed1 = 666
        seed2 = 42
        config = ProbabilisticIntentParserConfig(
            intent_classifier_config=LogRegIntentClassifierConfig(
                random_seed=seed1),
            slot_filler_config=CRFSlotFillerConfig(random_seed=seed2))
        shared = self.get_shared_data(dataset)
        parser = ProbabilisticIntentParser(config, **shared)
        parser.persist(self.tmp_file_path)

        # When
        fitted_parser_1 = ProbabilisticIntentParser.from_path(
            self.tmp_file_path, **shared).fit(dataset)

        fitted_parser_2 = ProbabilisticIntentParser.from_path(
            self.tmp_file_path, **shared).fit(dataset)

        # Then
        feature_weights_1 = fitted_parser_1.slot_fillers[
            "MakeTea"].crf_model.state_features_
        feature_weights_2 = fitted_parser_2.slot_fillers[
            "MakeTea"].crf_model.state_features_
        self.assertEqual(feature_weights_1, feature_weights_2)
Example #3
0
    def test_should_be_deserializable(self):
        # When
        register_processing_unit(TestIntentClassifier)
        register_processing_unit(TestSlotFiller)

        config = ProbabilisticIntentParserConfig(
            intent_classifier_config=TestIntentClassifierConfig(),
            slot_filler_config=TestSlotFillerConfig()
        )
        parser_dict = {
            "unit_name": "probabilistic_intent_parser",
            "slot_fillers": [
                {
                    "intent": "MakeCoffee",
                    "slot_filler_name": "slot_filler_MakeCoffee"
                },
                {
                    "intent": "MakeTea",
                    "slot_filler_name": "slot_filler_MakeTea"
                }
            ],
            "config": config.to_dict(),
        }
        self.tmp_file_path.mkdir()
        (self.tmp_file_path / "intent_classifier").mkdir()
        (self.tmp_file_path / "slot_filler_MakeCoffee").mkdir()
        (self.tmp_file_path / "slot_filler_MakeTea").mkdir()
        self.writeJsonContent(self.tmp_file_path / "intent_parser.json",
                              parser_dict)
        self.writeJsonContent(
            self.tmp_file_path / "intent_classifier" / "metadata.json",
            {"unit_name": "test_intent_classifier"})
        self.writeJsonContent(
            self.tmp_file_path / "slot_filler_MakeCoffee" / "metadata.json",
            {"unit_name": "test_slot_filler"})
        self.writeJsonContent(
            self.tmp_file_path / "slot_filler_MakeTea" / "metadata.json",
            {"unit_name": "test_slot_filler"})

        # When
        parser = ProbabilisticIntentParser.from_path(self.tmp_file_path)

        # Then
        self.assertDictEqual(parser.config.to_dict(), config.to_dict())
        self.assertIsNotNone(parser.intent_classifier)
        self.assertListEqual(sorted(parser.slot_fillers),
                             ["MakeCoffee", "MakeTea"])
Example #4
0
    def test_should_be_deserializable_before_fitting(self):
        # When
        config = ProbabilisticIntentParserConfig().to_dict()
        parser_dict = {
            "unit_name": "probabilistic_intent_parser",
            "config": config,
            "intent_classifier": None,
            "slot_fillers": dict(),
        }
        self.tmp_file_path.mkdir()
        metadata = {"unit_name": "probabilistic_intent_parser"}
        self.writeJsonContent(self.tmp_file_path / "metadata.json", metadata)
        self.writeJsonContent(self.tmp_file_path / "intent_parser.json",
                              parser_dict)

        # When
        parser = ProbabilisticIntentParser.from_path(self.tmp_file_path)

        # Then
        self.assertEqual(parser.config.to_dict(), config)
        self.assertIsNone(parser.intent_classifier)
        self.assertDictEqual(dict(), parser.slot_fillers)
    def test_should_be_deserializable(self):
        # When
        @IntentClassifier.register("my_intent_classifier", True)
        class MyIntentClassifier(MockIntentClassifier):
            pass

        @SlotFiller.register("my_slot_filler", True)
        class MySlotFiller(MockSlotFiller):
            pass

        parser_config = {
            "unit_name": "probabilistic_intent_parser",
            "intent_classifier_config": {
                "unit_name": "my_intent_classifier"
            },
            "slot_filler_config": {
                "unit_name": "my_slot_filler"
            }
        }
        parser_dict = {
            "unit_name":
            "probabilistic_intent_parser",
            "slot_fillers": [{
                "intent": "MakeCoffee",
                "slot_filler_name": "slot_filler_MakeCoffee"
            }, {
                "intent": "MakeTea",
                "slot_filler_name": "slot_filler_MakeTea"
            }],
            "config":
            parser_config,
        }
        self.tmp_file_path.mkdir()
        (self.tmp_file_path / "intent_classifier").mkdir()
        (self.tmp_file_path / "slot_filler_MakeCoffee").mkdir()
        (self.tmp_file_path / "slot_filler_MakeTea").mkdir()
        self.writeJsonContent(self.tmp_file_path / "intent_parser.json",
                              parser_dict)
        self.writeJsonContent(
            self.tmp_file_path / "intent_classifier" / "metadata.json", {
                "unit_name": "my_intent_classifier",
                "fitted": True
            })
        self.writeJsonContent(
            self.tmp_file_path / "slot_filler_MakeCoffee" / "metadata.json", {
                "unit_name": "my_slot_filler",
                "fitted": True
            })
        self.writeJsonContent(
            self.tmp_file_path / "slot_filler_MakeTea" / "metadata.json", {
                "unit_name": "my_slot_filler",
                "fitted": True
            })

        # When
        parser = ProbabilisticIntentParser.from_path(self.tmp_file_path)

        # Then
        self.assertDictEqual(parser.config.to_dict(), parser_config)
        self.assertIsInstance(parser.intent_classifier, MyIntentClassifier)
        self.assertListEqual(sorted(parser.slot_fillers),
                             ["MakeCoffee", "MakeTea"])
        for slot_filler in itervalues(parser.slot_fillers):
            self.assertIsInstance(slot_filler, MySlotFiller)