Exemple #1
0
    def test_training_should_be_reproducible(self):
        # Given
        random_state = 42
        dataset_stream = io.StringIO("""
---
type: intent
name: MakeTea
utterances:
- make me a hot cup of tea
- make me five tea cups

---
type: intent
name: MakeCoffee
utterances:
- make me one cup of coffee please
- brew two cups of coffee""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json

        # When
        engine1 = SnipsNLUEngine(random_state=random_state)
        engine1.fit(dataset)

        engine2 = SnipsNLUEngine(random_state=random_state)
        engine2.fit(dataset)

        # Then
        with temp_dir() as tmp_dir:
            dir_engine1 = tmp_dir / "engine1"
            dir_engine2 = tmp_dir / "engine2"
            engine1.persist(dir_engine1)
            engine2.persist(dir_engine2)
            hash1 = dirhash(str(dir_engine1), 'sha256')
            hash2 = dirhash(str(dir_engine2), 'sha256')
            self.assertEqual(hash1, hash2)
Exemple #2
0
    def test_training_should_be_reproducible(self):
        # Given
        random_state = 40
        dataset_stream = io.StringIO("""
---
type: intent
name: MakeTea
utterances:
- make me a [beverage_temperature:Temperature](hot) cup of tea
- make me [number_of_cups:snips/number](five) tea cups

---
type: intent
name: MakeCoffee
utterances:
- make me [number_of_cups:snips/number](one) cup of coffee please
- brew [number_of_cups] cups of coffee""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json

        # When
        classifier1 = LogRegIntentClassifier(random_state=random_state)
        classifier1.fit(dataset)

        classifier2 = LogRegIntentClassifier(random_state=random_state)
        classifier2.fit(dataset)

        # Then
        with temp_dir() as tmp_dir:
            dir_classifier1 = tmp_dir / "classifier1"
            dir_classifier2 = tmp_dir / "classifier2"
            classifier1.persist(dir_classifier1)
            classifier2.persist(dir_classifier2)
            hash1 = dirhash(str(dir_classifier1), 'sha256')
            hash2 = dirhash(str(dir_classifier2), 'sha256')
            self.assertEqual(hash1, hash2)
Exemple #3
0
    def from_byte_array(cls, unit_bytes, **shared):
        """Load a :class:`ProcessingUnit` instance from a bytearray

        Args:
            unit_bytes (bytearray): A bytearray representing a zipped
                processing unit.
        """
        cleaned_unit_name = _sanitize_unit_name(cls.unit_name)
        with temp_dir() as tmp_dir:
            file_io = io.BytesIO(unit_bytes)
            unzip_archive(file_io, str(tmp_dir))
            processing_unit = cls.from_path(tmp_dir / cleaned_unit_name,
                                            **shared)
        return processing_unit
def _build_builtin_parser(language, gazetteer_entities):
    with temp_dir() as serialization_dir:
        gazetteer_entity_parser = None
        if gazetteer_entities:
            gazetteer_entity_parser = _build_gazetteer_parser(
                serialization_dir, gazetteer_entities, language)

        metadata = {
            "language": language.upper(),
            "gazetteer_parser": gazetteer_entity_parser
        }
        metadata_path = serialization_dir / "metadata.json"
        with metadata_path.open("w", encoding="utf-8") as f:
            f.write(json_string(metadata))
        parser = _BuiltinEntityParser.from_path(serialization_dir)
        return BuiltinEntityParser(parser)
Exemple #5
0
    def test_training_should_be_reproducible(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: MakeTea
utterances:
- make me a [beverage_temperature:Temperature](hot) cup of tea
- make me [number_of_cups:snips/number](five) tea cups

---
type: intent
name: MakeCoffee
utterances:
- make me [number_of_cups:snips/number](one) cup of coffee please
- brew [number_of_cups] cups of coffee""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        utterances = [
            text_to_utterance("please make me two hots cups of tea"),
            text_to_utterance("i want a cup of coffee"),
        ]
        classes = np.array([0, 1])
        shared = self.get_shared_data(dataset)
        shared["random_state"] = 42

        # When
        featurizer1 = Featurizer(**shared)
        featurizer1.fit(dataset, utterances, classes, max(classes))

        featurizer2 = Featurizer(**shared)
        featurizer2.fit(dataset, utterances, classes, max(classes))

        # Then
        with temp_dir() as tmp_dir:
            dir_featurizer1 = tmp_dir / "featurizer1"
            dir_featurizer2 = tmp_dir / "featurizer2"
            featurizer1.persist(dir_featurizer1)
            featurizer2.persist(dir_featurizer2)
            hash1 = dirhash(str(dir_featurizer1), 'sha256')
            hash2 = dirhash(str(dir_featurizer2), 'sha256')
            self.assertEqual(hash1, hash2)
Exemple #6
0
    def to_byte_array(self):
        """Serialize the :class:`ProcessingUnit` instance into a bytearray

        This method persists the processing unit in a temporary directory, zip
        the directory and return the zipped file as binary data.

        Returns:
            bytearray: the processing unit as bytearray data
        """
        cleaned_unit_name = _sanitize_unit_name(self.unit_name)
        with temp_dir() as tmp_dir:
            processing_unit_dir = tmp_dir / cleaned_unit_name
            self.persist(processing_unit_dir)
            archive_base_name = tmp_dir / cleaned_unit_name
            archive_name = archive_base_name.with_suffix(".zip")
            shutil.make_archive(
                base_name=str(archive_base_name), format="zip",
                root_dir=str(tmp_dir), base_dir=cleaned_unit_name)
            with archive_name.open(mode="rb") as f:
                processing_unit_bytes = bytearray(f.read())
        return processing_unit_bytes