def test_training_should_be_reproducible(self): # Given random_state = 42 dataset_stream = io.StringIO(""" --- type: intent name: MakeTea utterances: - make me a hot cup of tea - make me five tea cups --- type: intent name: MakeCoffee utterances: - make me one cup of coffee please - brew two cups of coffee""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json # When engine1 = SnipsNLUEngine(random_state=random_state) engine1.fit(dataset) engine2 = SnipsNLUEngine(random_state=random_state) engine2.fit(dataset) # Then with temp_dir() as tmp_dir: dir_engine1 = tmp_dir / "engine1" dir_engine2 = tmp_dir / "engine2" engine1.persist(dir_engine1) engine2.persist(dir_engine2) hash1 = dirhash(str(dir_engine1), 'sha256') hash2 = dirhash(str(dir_engine2), 'sha256') self.assertEqual(hash1, hash2)
def test_training_should_be_reproducible(self): # Given random_state = 40 dataset_stream = io.StringIO(""" --- type: intent name: MakeTea utterances: - make me a [beverage_temperature:Temperature](hot) cup of tea - make me [number_of_cups:snips/number](five) tea cups --- type: intent name: MakeCoffee utterances: - make me [number_of_cups:snips/number](one) cup of coffee please - brew [number_of_cups] cups of coffee""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json # When classifier1 = LogRegIntentClassifier(random_state=random_state) classifier1.fit(dataset) classifier2 = LogRegIntentClassifier(random_state=random_state) classifier2.fit(dataset) # Then with temp_dir() as tmp_dir: dir_classifier1 = tmp_dir / "classifier1" dir_classifier2 = tmp_dir / "classifier2" classifier1.persist(dir_classifier1) classifier2.persist(dir_classifier2) hash1 = dirhash(str(dir_classifier1), 'sha256') hash2 = dirhash(str(dir_classifier2), 'sha256') self.assertEqual(hash1, hash2)
def from_byte_array(cls, unit_bytes, **shared): """Load a :class:`ProcessingUnit` instance from a bytearray Args: unit_bytes (bytearray): A bytearray representing a zipped processing unit. """ cleaned_unit_name = _sanitize_unit_name(cls.unit_name) with temp_dir() as tmp_dir: file_io = io.BytesIO(unit_bytes) unzip_archive(file_io, str(tmp_dir)) processing_unit = cls.from_path(tmp_dir / cleaned_unit_name, **shared) return processing_unit
def _build_builtin_parser(language, gazetteer_entities): with temp_dir() as serialization_dir: gazetteer_entity_parser = None if gazetteer_entities: gazetteer_entity_parser = _build_gazetteer_parser( serialization_dir, gazetteer_entities, language) metadata = { "language": language.upper(), "gazetteer_parser": gazetteer_entity_parser } metadata_path = serialization_dir / "metadata.json" with metadata_path.open("w", encoding="utf-8") as f: f.write(json_string(metadata)) parser = _BuiltinEntityParser.from_path(serialization_dir) return BuiltinEntityParser(parser)
def test_training_should_be_reproducible(self): # Given dataset_stream = io.StringIO(""" --- type: intent name: MakeTea utterances: - make me a [beverage_temperature:Temperature](hot) cup of tea - make me [number_of_cups:snips/number](five) tea cups --- type: intent name: MakeCoffee utterances: - make me [number_of_cups:snips/number](one) cup of coffee please - brew [number_of_cups] cups of coffee""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json utterances = [ text_to_utterance("please make me two hots cups of tea"), text_to_utterance("i want a cup of coffee"), ] classes = np.array([0, 1]) shared = self.get_shared_data(dataset) shared["random_state"] = 42 # When featurizer1 = Featurizer(**shared) featurizer1.fit(dataset, utterances, classes, max(classes)) featurizer2 = Featurizer(**shared) featurizer2.fit(dataset, utterances, classes, max(classes)) # Then with temp_dir() as tmp_dir: dir_featurizer1 = tmp_dir / "featurizer1" dir_featurizer2 = tmp_dir / "featurizer2" featurizer1.persist(dir_featurizer1) featurizer2.persist(dir_featurizer2) hash1 = dirhash(str(dir_featurizer1), 'sha256') hash2 = dirhash(str(dir_featurizer2), 'sha256') self.assertEqual(hash1, hash2)
def to_byte_array(self): """Serialize the :class:`ProcessingUnit` instance into a bytearray This method persists the processing unit in a temporary directory, zip the directory and return the zipped file as binary data. Returns: bytearray: the processing unit as bytearray data """ cleaned_unit_name = _sanitize_unit_name(self.unit_name) with temp_dir() as tmp_dir: processing_unit_dir = tmp_dir / cleaned_unit_name self.persist(processing_unit_dir) archive_base_name = tmp_dir / cleaned_unit_name archive_name = archive_base_name.with_suffix(".zip") shutil.make_archive( base_name=str(archive_base_name), format="zip", root_dir=str(tmp_dir), base_dir=cleaned_unit_name) with archive_name.open(mode="rb") as f: processing_unit_bytes = bytearray(f.read()) return processing_unit_bytes