コード例 #1
0
    def test_should_be_serializable_into_bytearray(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: MakeTea
utterances:
- make me a cup of tea
- i want two cups of tea please
- can you prepare one cup of tea ?

---
type: intent
name: MakeCoffee
utterances:
- make me a cup of coffee please
- brew two cups of coffee
- can you prepare one cup of coffee""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        shared = self.get_shared_data(dataset)
        intent_classifier = LogRegIntentClassifier(**shared).fit(dataset)

        # When
        intent_classifier_bytes = intent_classifier.to_byte_array()
        loaded_classifier = LogRegIntentClassifier.from_byte_array(
            intent_classifier_bytes, **shared)
        result = loaded_classifier.get_intent("make me two cups of tea")

        # Then
        expected_intent = "MakeTea"
        self.assertEqual(expected_intent, result[RES_INTENT_NAME])
コード例 #2
0
    def test_should_get_intent_when_filter(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: MakeTea
utterances:
- make me a cup of tea
- i want two cups of tea please
- can you prepare one cup of tea ?

---
type: intent
name: MakeCoffee
utterances:
- make me a cup of coffee please
- brew two cups of coffee
- can you prepare one cup of coffee""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        classifier = LogRegIntentClassifier(random_state=42).fit(dataset)

        # When
        text1 = "Make me two cups of tea"
        res1 = classifier.get_intent(text1, ["MakeCoffee", "MakeTea"])

        text2 = "Make me two cups of tea"
        res2 = classifier.get_intent(text2, ["MakeCoffee"])

        text3 = "bla bla bla"
        res3 = classifier.get_intent(text3, ["MakeCoffee"])

        # Then
        self.assertEqual("MakeTea", res1[RES_INTENT_NAME])
        self.assertEqual("MakeCoffee", res2[RES_INTENT_NAME])
        self.assertEqual(None, res3[RES_INTENT_NAME])
コード例 #3
0
    def test_should_get_intent(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: my_first_intent
utterances:
- how are you
- hello how are you?
- what's up

---
type: intent
name: my_second_intent
utterances:
- what is the weather today ?
- does it rain
- will it rain tomorrow""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        classifier = LogRegIntentClassifier(random_state=42).fit(dataset)
        text = "hey how are you doing ?"

        # When
        res = classifier.get_intent(text)
        intent = res[RES_INTENT_NAME]

        # Then
        self.assertEqual("my_first_intent", intent)
コード例 #4
0
    def test_should_get_intents(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: intent1
utterances:
  - yala yili

---
type: intent
name: intent2
utterances:
  - yala yili yulu

---
type: intent
name: intent3
utterances:
  - yili yulu yele""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        classifier = LogRegIntentClassifier(random_state=42).fit(dataset)
        text = "yala yili yulu"

        # When
        results = classifier.get_intents(text)
        intents = [res[RES_INTENT_NAME] for res in results]

        # Then
        expected_intents = ["intent2", "intent1", "intent3", None]

        self.assertEqual(expected_intents, intents)
コード例 #5
0
    def test_should_be_serializable(self, mock_to_dict):
        # Given
        mocked_dict = {"mocked_featurizer_key": "mocked_featurizer_value"}

        mock_to_dict.return_value = mocked_dict

        dataset = validate_and_format_dataset(SAMPLE_DATASET)

        intent_classifier = LogRegIntentClassifier().fit(dataset)
        coeffs = intent_classifier.classifier.coef_.tolist()
        intercept = intent_classifier.classifier.intercept_.tolist()

        # When
        classifier_dict = intent_classifier.to_dict()

        # Then
        intent_list = sorted(SAMPLE_DATASET[INTENTS])
        intent_list.append(None)
        expected_dict = {
            "unit_name": "log_reg_intent_classifier",
            "config": LogRegIntentClassifierConfig().to_dict(),
            "coeffs": coeffs,
            "intercept": intercept,
            "t_": 701.0,
            "intent_list": intent_list,
            "featurizer": mocked_dict
        }
        self.assertEqual(expected_dict, classifier_dict)
コード例 #6
0
    def test_should_get_none_intent_when_empty_input(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: my_first_intent
utterances:
- how are you
- hello how are you?
- what's up

---
type: intent
name: my_second_intent
utterances:
- what is the weather today ?
- does it rain
- will it rain tomorrow""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        classifier = LogRegIntentClassifier().fit(dataset)
        text = ""

        # When
        result = classifier.get_intent(text)

        # Then
        self.assertEqual(intent_classification_result(None, 1.0), result)
    def test_should_not_retrain_intent_classifier_when_no_force_retrain(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: MakeTea
utterances:
- make me a [beverage_temperature:Temperature](hot) cup of tea
- make me [number_of_cups:snips/number](five) tea cups

---
type: intent
name: MakeCoffee
utterances:
- make me [number_of_cups:snips/number](one) cup of coffee please
- brew [number_of_cups] cups of coffee""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        parser = ProbabilisticIntentParser()
        intent_classifier = LogRegIntentClassifier()
        intent_classifier.fit(dataset)
        parser.intent_classifier = intent_classifier

        # When / Then
        with patch("snips_nlu.intent_classifier.log_reg_classifier"
                   ".LogRegIntentClassifier.fit") as mock_fit:
            parser.fit(dataset, force_retrain=False)
            mock_fit.assert_not_called()
コード例 #8
0
    def test_should_be_serializable(self, mock_to_dict):
        # Given
        mocked_dict = {"mocked_featurizer_key": "mocked_featurizer_value"}

        mock_to_dict.return_value = mocked_dict

        dataset = validate_and_format_dataset(SAMPLE_DATASET)

        intent_classifier = LogRegIntentClassifier().fit(dataset)
        coeffs = intent_classifier.classifier.coef_.tolist()
        intercept = intent_classifier.classifier.intercept_.tolist()

        # When
        intent_classifier.persist(self.tmp_file_path)

        # Then
        intent_list = sorted(SAMPLE_DATASET[INTENTS])
        intent_list.append(None)
        expected_dict = {
            "unit_name": "log_reg_intent_classifier",
            "config": LogRegIntentClassifierConfig().to_dict(),
            "coeffs": coeffs,
            "intercept": intercept,
            "t_": 701.0,
            "intent_list": intent_list,
            "featurizer": mocked_dict
        }
        metadata = {"unit_name": "log_reg_intent_classifier"}
        self.assertJsonContent(self.tmp_file_path / "metadata.json", metadata)
        self.assertJsonContent(self.tmp_file_path / "intent_classifier.json",
                               expected_dict)
コード例 #9
0
    def test_should_get_intent_after_deserialization(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: MakeTea
utterances:
- make me a cup of tea
- i want two cups of tea please
- can you prepare one cup of tea ?

---
type: intent
name: MakeCoffee
utterances:
- make me a cup of coffee please
- brew two cups of coffee
- can you prepare one cup of coffee""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        shared = self.get_shared_data(dataset)
        classifier = LogRegIntentClassifier(**shared).fit(dataset)
        classifier.persist(self.tmp_file_path)

        # When
        loaded_classifier = LogRegIntentClassifier.from_path(
            self.tmp_file_path, **shared)
        result = loaded_classifier.get_intent("Make me two cups of tea")

        # Then
        expected_intent = "MakeTea"
        self.assertEqual(expected_intent, result[RES_INTENT_NAME])
コード例 #10
0
    def test_empty_vocabulary_should_fit_and_return_none_intent(
            self, mocked_build_training):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: dummy_intent_1
utterances:
  - "[dummy_slot_name:dummy_entity_1](...)"
  
---
type: entity
name: dummy_entity_1
automatically_extensible: true
use_synonyms: false
matching_strictness: 1.0
values:
  - ...
""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        text = " "
        noise_size = 6
        utterances = [text] + [text] * noise_size
        utterances = [text_to_utterance(t) for t in utterances]
        labels = [0] + [1] * noise_size
        intent_list = ["dummy_intent_1", None]
        mocked_build_training.return_value = utterances, labels, intent_list

        # When / Then
        intent_classifier = LogRegIntentClassifier().fit(dataset)
        intent = intent_classifier.get_intent("no intent there")
        self.assertEqual(intent_classification_result(None, 1.0), intent)
コード例 #11
0
    def test_should_not_get_intent_when_not_fitted(self):
        # Given
        intent_classifier = LogRegIntentClassifier()

        # When / Then
        self.assertFalse(intent_classifier.fitted)
        with self.assertRaises(NotTrained):
            intent_classifier.get_intent("foobar")
コード例 #12
0
    def test_should_get_intents_when_empty_dataset(self):
        # Given
        dataset = get_empty_dataset(LANGUAGE_EN)
        classifier = LogRegIntentClassifier().fit(dataset)
        text = "this is a dummy query"

        # When
        results = classifier.get_intents(text)

        # Then
        expected_results = [{RES_INTENT_NAME: None, RES_PROBA: 1.0}]
        self.assertEqual(expected_results, results)
コード例 #13
0
    def test_should_get_none_intent_when_empty_dataset(self):
        # Given
        dataset = get_empty_dataset(LANGUAGE_EN)
        classifier = LogRegIntentClassifier().fit(dataset)
        text = "this is a dummy query"

        # When
        intent = classifier.get_intent(text)

        # Then
        expected_intent = intent_classification_result(None, 1.0)
        self.assertEqual(intent, expected_intent)
コード例 #14
0
    def test_should_get_none_if_empty_dataset(self):
        # Given
        dataset = validate_and_format_dataset(get_empty_dataset(LANGUAGE_EN))
        classifier = LogRegIntentClassifier().fit(dataset)
        text = "this is a dummy query"

        # When
        intent = classifier.get_intent(text)

        # Then
        expected_intent = None
        self.assertEqual(intent, expected_intent)
コード例 #15
0
    def test_should_not_retrain_intent_classifier_when_no_force_retrain(self):
        # Given
        parser = ProbabilisticIntentParser()
        intent_classifier = LogRegIntentClassifier()
        intent_classifier.fit(BEVERAGE_DATASET)
        parser.intent_classifier = intent_classifier

        # When / Then
        with patch("snips_nlu.intent_classifier.log_reg_classifier"
                   ".LogRegIntentClassifier.fit") as mock_fit:
            parser.fit(BEVERAGE_DATASET, force_retrain=False)
            mock_fit.assert_not_called()
コード例 #16
0
    def test_should_get_none_if_empty_dataset(self):
        # Given
        dataset = validate_and_format_dataset(get_empty_dataset(LANGUAGE_EN))
        classifier = LogRegIntentClassifier().fit(dataset)
        text = "this is a dummy query"

        # When
        intent = classifier.get_intent(text)

        # Then
        expected_intent = None
        self.assertEqual(intent, expected_intent)
コード例 #17
0
    def test_should_get_intent_after_deserialization(self):
        # Given
        dataset = validate_and_format_dataset(BEVERAGE_DATASET)
        classifier = LogRegIntentClassifier().fit(dataset)
        classifier_dict = classifier.to_dict()

        # When
        loaded_classifier = LogRegIntentClassifier.from_dict(classifier_dict)
        result = loaded_classifier.get_intent("Make me two cups of tea")

        # Then
        expected_intent = "MakeTea"
        self.assertEqual(expected_intent, result[RES_INTENT_NAME])
コード例 #18
0
    def test_should_get_intent_after_deserialization(self):
        # Given
        dataset = validate_and_format_dataset(BEVERAGE_DATASET)
        classifier = LogRegIntentClassifier().fit(dataset)
        classifier_dict = classifier.to_dict()

        # When
        loaded_classifier = LogRegIntentClassifier.from_dict(classifier_dict)
        result = loaded_classifier.get_intent("Make me two cups of tea")

        # Then
        expected_intent = "MakeTea"
        self.assertEqual(expected_intent, result[RES_INTENT_NAME])
コード例 #19
0
    def test_should_be_serializable_into_bytearray(self):
        # Given
        dataset = BEVERAGE_DATASET
        intent_classifier = LogRegIntentClassifier().fit(dataset)

        # When
        intent_classifier_bytes = intent_classifier.to_byte_array()
        loaded_classifier = LogRegIntentClassifier.from_byte_array(
            intent_classifier_bytes)
        result = loaded_classifier.get_intent("make me two cups of tea")

        # Then
        expected_intent = "MakeTea"
        self.assertEqual(expected_intent, result[RES_INTENT_NAME])
コード例 #20
0
    def test_intent_classifier_should_get_intent(self):
        # Given
        dataset = validate_and_format_dataset(SAMPLE_DATASET)
        classifier = LogRegIntentClassifier().fit(dataset)
        text = "This is a dummy_3 query from another intent"

        # When
        res = classifier.get_intent(text)
        intent = res[RES_INTENT_NAME]

        # Then
        expected_intent = "dummy_intent_2"

        self.assertEqual(intent, expected_intent)
コード例 #21
0
    def test_intent_classifier_should_get_intent(self):
        # Given
        dataset = validate_and_format_dataset(SAMPLE_DATASET)
        classifier = LogRegIntentClassifier().fit(dataset)
        text = "This is a dummy_3 query from another intent"

        # When
        res = classifier.get_intent(text)
        intent = res[RES_INTENT_NAME]

        # Then
        expected_intent = "dummy_intent_2"

        self.assertEqual(intent, expected_intent)
コード例 #22
0
    def test_empty_vocabulary_should_fit_and_return_none_intent(
            self, mocked_build_training):
        # Given
        language = LANGUAGE_EN
        dataset = {
            "entities": {
                "dummy_entity_1": {
                    "automatically_extensible": True,
                    "use_synonyms": False,
                    "data": [
                        {
                            "value": "...",
                            "synonyms": [],
                        }
                    ],
                    "matching_strictness": 1.0
                }
            },
            "intents": {
                "dummy_intent_1": {
                    "utterances": [
                        {
                            "data": [
                                {
                                    "text": "...",
                                    "slot_name": "dummy_slot_name",
                                    "entity": "dummy_entity_1"
                                }
                            ]
                        }
                    ]
                }
            },
            "language": language
        }
        dataset = validate_and_format_dataset(dataset)

        text = " "
        noise_size = 6
        utterances = [text] + [text] * noise_size
        utterances = [text_to_utterance(t) for t in utterances]
        labels = [0] + [1] * noise_size
        intent_list = ["dummy_intent_1", None]
        mocked_build_training.return_value = utterances, labels, intent_list

        # When / Then
        intent_classifier = LogRegIntentClassifier().fit(dataset)
        intent = intent_classifier.get_intent("no intent there")
        self.assertEqual(None, intent)
コード例 #23
0
    def test_empty_vocabulary_should_fit_and_return_none_intent(
            self, mocked_build_training):
        # Given
        language = LANGUAGE_EN
        dataset = {
            "snips_nlu_version": "0.0.1",
            "entities": {
                "dummy_entity_1": {
                    "automatically_extensible": True,
                    "use_synonyms": False,
                    "data": [
                        {
                            "value": "...",
                            "synonyms": [],
                        }
                    ]
                }
            },
            "intents": {
                "dummy_intent_1": {
                    "utterances": [
                        {
                            "data": [
                                {
                                    "text": "...",
                                    "slot_name": "dummy_slot_name",
                                    "entity": "dummy_entity_1"
                                }
                            ]
                        }
                    ]
                }
            },
            "language": language
        }
        dataset = validate_and_format_dataset(dataset)

        text = " "
        noise_size = 6
        utterance = [text] + [text] * noise_size
        labels = [1] + [None] * noise_size
        intent_list = ["dummy_intent_1", None]
        mocked_build_training.return_value = utterance, labels, intent_list

        # When / Then
        intent_classifier = LogRegIntentClassifier().fit(dataset)
        intent = intent_classifier.get_intent("no intent there")
        self.assertEqual(intent, None)
コード例 #24
0
    def test_should_be_serializable_into_bytearray(self):
        # Given
        dataset = validate_and_format_dataset(BEVERAGE_DATASET)
        intent_classifier = LogRegIntentClassifier().fit(dataset)

        # When
        intent_classifier_bytes = intent_classifier.to_byte_array()
        custom_entity_parser = CustomEntityParser.build(
            dataset, CustomEntityParserUsage.WITHOUT_STEMS)
        builtin_entity_parser = BuiltinEntityParser.build(language="en")
        loaded_classifier = LogRegIntentClassifier.from_byte_array(
            intent_classifier_bytes,
            builtin_entity_parser=builtin_entity_parser,
            custom_entity_parser=custom_entity_parser)
        result = loaded_classifier.get_intent("make me two cups of tea")

        # Then
        expected_intent = "MakeTea"
        self.assertEqual(expected_intent, result[RES_INTENT_NAME])
コード例 #25
0
    def test_should_get_intent_after_deserialization(self):
        # Given
        dataset = validate_and_format_dataset(BEVERAGE_DATASET)
        classifier = LogRegIntentClassifier().fit(dataset)
        classifier.persist(self.tmp_file_path)

        # When
        builtin_entity_parser = BuiltinEntityParser.build(language="en")
        custom_entity_parser = CustomEntityParser.build(
            dataset, CustomEntityParserUsage.WITHOUT_STEMS)
        loaded_classifier = LogRegIntentClassifier.from_path(
            self.tmp_file_path,
            builtin_entity_parser=builtin_entity_parser,
            custom_entity_parser=custom_entity_parser)
        result = loaded_classifier.get_intent("Make me two cups of tea")

        # Then
        expected_intent = "MakeTea"
        self.assertEqual(expected_intent, result[RES_INTENT_NAME])
コード例 #26
0
    def test_intent_classifier_should_get_intent_when_filter(self):
        # Given
        dataset = validate_and_format_dataset(BEVERAGE_DATASET)
        classifier = LogRegIntentClassifier().fit(dataset)

        # When
        text1 = "Make me two cups of tea"
        res1 = classifier.get_intent(text1, ["MakeCoffee", "MakeTea"])

        text2 = "Make me two cups of tea"
        res2 = classifier.get_intent(text2, ["MakeCoffee"])

        text3 = "bla bla bla"
        res3 = classifier.get_intent(text3, ["MakeCoffee"])

        # Then
        self.assertEqual("MakeTea", res1[RES_INTENT_NAME])
        self.assertEqual("MakeCoffee", res2[RES_INTENT_NAME])
        self.assertEqual(None, res3)
コード例 #27
0
    def test_intent_classifier_should_get_intent_when_filter(self):
        # Given
        dataset = validate_and_format_dataset(BEVERAGE_DATASET)
        classifier = LogRegIntentClassifier().fit(dataset)

        # When
        text1 = "Make me two cups of tea"
        res1 = classifier.get_intent(text1, ["MakeCoffee", "MakeTea"])

        text2 = "Make me two cups of tea"
        res2 = classifier.get_intent(text2, ["MakeCoffee"])

        text3 = "bla bla bla"
        res3 = classifier.get_intent(text3, ["MakeCoffee"])

        # Then
        self.assertEqual("MakeTea", res1[RES_INTENT_NAME])
        self.assertEqual("MakeCoffee", res2[RES_INTENT_NAME])
        self.assertEqual(None, res3)
コード例 #28
0
    def test_should_be_serializable(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: intent1
utterances:
  - foo bar

---
type: intent
name: intent2
utterances:
  - lorem ipsum""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        intent_classifier = LogRegIntentClassifier(
            random_state=42).fit(dataset)
        coeffs = intent_classifier.classifier.coef_.tolist()
        intercept = intent_classifier.classifier.intercept_.tolist()
        t_ = intent_classifier.classifier.t_

        # When
        intent_classifier.persist(self.tmp_file_path)

        # Then
        intent_list = ["intent1", "intent2", None]
        expected_dict = {
            "config": LogRegIntentClassifierConfig().to_dict(),
            "coeffs": coeffs,
            "intercept": intercept,
            "t_": t_,
            "intent_list": intent_list,
            "featurizer": "featurizer"
        }
        metadata = {"unit_name": "log_reg_intent_classifier"}
        self.assertJsonContent(self.tmp_file_path / "metadata.json", metadata)
        self.assertJsonContent(self.tmp_file_path / "intent_classifier.json",
                               expected_dict)
        featurizer_path = self.tmp_file_path / "featurizer"
        self.assertTrue(featurizer_path.exists())
        self.assertTrue(featurizer_path.is_dir())
コード例 #29
0
    def test_log_activation_weights(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: intent1
utterances:
  - foo bar

---
type: intent
name: intent2
utterances:
  - lorem ipsum""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        shared = self.get_shared_data(dataset)
        intent_classifier = LogRegIntentClassifier(**shared)

        text = "yo"
        utterances = [text_to_utterance(text)]
        self.assertIsNone(intent_classifier.log_activation_weights(text, None))

        # When
        intent_classifier.fit(dataset)
        x = intent_classifier.featurizer.transform(utterances)[0]
        log = intent_classifier.log_activation_weights(text, x, top_n=42)

        # Then
        self.assertIsInstance(log, str)
        self.assertIn("Top 42", log)
コード例 #30
0
    def test_log_best_features(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: intent1
utterances:
  - foo bar

---
type: intent
name: intent2
utterances:
  - lorem ipsum""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        shared = self.get_shared_data(dataset)
        intent_classifier = LogRegIntentClassifier(**shared)

        # When
        self.assertIsNone(intent_classifier.log_best_features(20))
        intent_classifier.fit(dataset)
        log = intent_classifier.log_best_features(20)

        # Then
        self.assertIsInstance(log, str)
        self.assertIn("Top 20", log)
コード例 #31
0
    def test_should_get_intents_when_empty_input(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: intent1
utterances:
  - foo bar

---
type: intent
name: intent2
utterances:
  - lorem ipsum""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        classifier = LogRegIntentClassifier().fit(dataset)
        text = ""

        # When
        results = classifier.get_intents(text)

        # Then
        expected_results = [
            {
                RES_INTENT_NAME: None,
                RES_PROBA: 1.0
            },
            {
                RES_INTENT_NAME: "intent1",
                RES_PROBA: 0.0
            },
            {
                RES_INTENT_NAME: "intent2",
                RES_PROBA: 0.0
            },
        ]
        self.assertEqual(expected_results, results)
コード例 #32
0
    def test_should_be_deserializable(self, mock_from_dict):
        # Given
        mocked_featurizer = Featurizer(LANGUAGE_EN, None)
        mock_from_dict.return_value = mocked_featurizer

        intent_list = ["MakeCoffee", "MakeTea", None]

        coeffs = [
            [1.23, 4.5],
            [6.7, 8.90],
            [1.01, 2.345],
        ]

        intercept = [
            0.34,
            0.41,
            -0.98
        ]

        t_ = 701.

        config = LogRegIntentClassifierConfig().to_dict()

        classifier_dict = {
            "coeffs": coeffs,
            "intercept": intercept,
            "t_": t_,
            "intent_list": intent_list,
            "config": config,
            "featurizer": mocked_featurizer.to_dict(),
        }
        self.tmp_file_path.mkdir()
        metadata = {"unit_name": "log_reg_intent_classifier"}
        self.writeJsonContent(self.tmp_file_path / "metadata.json", metadata)
        self.writeJsonContent(self.tmp_file_path / "intent_classifier.json",
                              classifier_dict)

        # When
        classifier = LogRegIntentClassifier.from_path(self.tmp_file_path)

        # Then
        self.assertEqual(classifier.intent_list, intent_list)
        self.assertIsNotNone(classifier.featurizer)
        self.assertListEqual(classifier.classifier.coef_.tolist(), coeffs)
        self.assertListEqual(classifier.classifier.intercept_.tolist(),
                             intercept)
        self.assertDictEqual(classifier.config.to_dict(), config)
コード例 #33
0
    def test_should_be_deserializable(self, mock_from_dict):
        # Given
        mocked_featurizer = Featurizer(LANGUAGE_EN, None)
        mock_from_dict.return_value = mocked_featurizer

        intent_list = ["MakeCoffee", "MakeTea", None]

        coeffs = [
            [1.23, 4.5],
            [6.7, 8.90],
            [1.01, 2.345],
        ]

        intercept = [
            0.34,
            0.41,
            -0.98
        ]

        t_ = 701.

        config = LogRegIntentClassifierConfig().to_dict()

        classifier_dict = {
            "coeffs": coeffs,
            "intercept": intercept,
            "t_": t_,
            "intent_list": intent_list,
            "config": config,
            "featurizer": mocked_featurizer.to_dict(),
        }

        # When
        classifier = LogRegIntentClassifier.from_dict(classifier_dict)

        # Then
        self.assertEqual(classifier.intent_list, intent_list)
        self.assertIsNotNone(classifier.featurizer)
        self.assertListEqual(classifier.classifier.coef_.tolist(), coeffs)
        self.assertListEqual(classifier.classifier.intercept_.tolist(),
                             intercept)
        self.assertDictEqual(classifier.config.to_dict(), config)
コード例 #34
0
    def test_should_be_deserializable(self, mock_from_dict):
        # Given
        mocked_featurizer = Featurizer(LANGUAGE_EN, None)
        mock_from_dict.return_value = mocked_featurizer

        intent_list = ["MakeCoffee", "MakeTea", None]

        coeffs = [
            [1.23, 4.5],
            [6.7, 8.90],
            [1.01, 2.345],
        ]

        intercept = [0.34, 0.41, -0.98]

        t_ = 701.

        config = LogRegIntentClassifierConfig().to_dict()

        classifier_dict = {
            "coeffs": coeffs,
            "intercept": intercept,
            "t_": t_,
            "intent_list": intent_list,
            "config": config,
            "featurizer": mocked_featurizer.to_dict(),
        }

        # When
        classifier = LogRegIntentClassifier.from_dict(classifier_dict)

        # Then
        self.assertEqual(classifier.intent_list, intent_list)
        self.assertIsNotNone(classifier.featurizer)
        self.assertListEqual(classifier.classifier.coef_.tolist(), coeffs)
        self.assertListEqual(classifier.classifier.intercept_.tolist(),
                             intercept)
        self.assertDictEqual(classifier.config.to_dict(), config)
コード例 #35
0
    def test_training_should_be_reproducible(self):
        # Given
        random_state = 40
        dataset_stream = io.StringIO("""
---
type: intent
name: MakeTea
utterances:
- make me a [beverage_temperature:Temperature](hot) cup of tea
- make me [number_of_cups:snips/number](five) tea cups

---
type: intent
name: MakeCoffee
utterances:
- make me [number_of_cups:snips/number](one) cup of coffee please
- brew [number_of_cups] cups of coffee""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json

        # When
        classifier1 = LogRegIntentClassifier(random_state=random_state)
        classifier1.fit(dataset)

        classifier2 = LogRegIntentClassifier(random_state=random_state)
        classifier2.fit(dataset)

        # Then
        with temp_dir() as tmp_dir:
            dir_classifier1 = tmp_dir / "classifier1"
            dir_classifier2 = tmp_dir / "classifier2"
            classifier1.persist(dir_classifier1)
            classifier2.persist(dir_classifier2)
            hash1 = dirhash(str(dir_classifier1), 'sha256')
            hash2 = dirhash(str(dir_classifier2), 'sha256')
            self.assertEqual(hash1, hash2)