def create_nlp_model() -> SnipsNLUEngine: """ This function trains a new ML model from the given dataset. It then saves the model in the root directory of the project with the file name: nlpumodel This function will only be called once, at the start of the program, if nlumodel file is not detected in the current directory Parameters required: None Return data: Trained SnipsNLUEngine object """ # Creating a barebones engine engine = SnipsNLUEngine(config=CONFIG_EN) # Creating dataset from yaml files present in nlputrain directory data = dataset.Dataset.from_yaml_files( "en", ["./nlputrain/" + i for i in os.listdir("./nlputrain/") if ".yaml" in i] ) # Training the engine with given dataset engine.fit(data) # Persisting the engine so it can be used easily later # Persisting engine is saved in nlumodel folder try: engine.persist("nlumodel") except PersistingError: print("Old NLP file still exists. Deleting..") # Removing old model files using shutil shutil.rmtree("nlumodel") engine.persist("nlumodel") print("NLP model has been created and saved in directory: nlumodel") # Returning trained engine return engine
class SnipsService(ApiService): def __init__(self, classes, model_path=None, max_api_calls=None, verbose=False): super().__init__(classes, max_api_calls, verbose) load_resources('en') if model_path: self.load_model(model_path) else: self.engine = SnipsNLUEngine(config=CONFIG_EN) def train_model(self, dataset): self.engine.fit(dataset) def train_model_from_file(self, dataset_path): with io.open(dataset_path) as f: self.train_model(json.load(f)) def save_model(self, model_path): self.engine.persist(model_path) def load_model(self, model_path): self.engine = SnipsNLUEngine.from_path(model_path) def predict(self, utterance): result = self.engine.parse(utterance) try: return result['intent']['intentName'] except Exception as e: print('ERR:', e) print('Failed to parse: "{}"'.format(utterance)) print(result) return None
def save_engine(engine: SnipsNLUEngine, path: str) -> None: """Save trained snips nlu engine in a tar.gz archive.""" with tarfile.open(path, "w:gz") as archive: with TemporaryDirectory() as tmp: path = os.path.join(tmp, "engine") logger.debug("Saving engine to path %s", path) engine.persist(path) archive.add(path, arcname="engine")
def train_eval_snips_nlu_model(lang='en', cross=False, save=''): """ Train snips data from all brat annotation object :param lang: abbreviate language name :param save: path where model will be save :return: None :rtype: None """ from snips_nlu import SnipsNLUEngine from snips_nlu.default_configs import CONFIG_EN from snips_nlu_metrics import compute_train_test_metrics, compute_cross_val_metrics import pickle import json if cross: train_data_obj = BuildSnipsDataTask1(lang, cross=cross, vers=save) train_data = train_data_obj.build_snips_data_task1() print("--> Evaluating training data with Snips metrics...") filename_results = source_result / "snips_semeval_2020_evaluation_task1_{}.pkl".format(save) if not Path(filename_results).exists(): tt_metrics = compute_train_test_metrics(train_dataset=train_data[0], test_dataset=train_data[1], engine_class=SnipsNLUEngine, include_slot_metrics=False) #print(tt_metrics) if not Path(filename_results).exists(): print("--> Writing snips nlu metrics data to file...") with codecs.open(filename_results, 'wb') as metric: pickle.dump(tt_metrics, metric) from datetime import datetime dmtime = "_{}_{}".format(save, datetime.now().strftime("%Y%m%d-%H%M%S")) name = "snips_semeval_2020_evaluation_task1{}.json".format(dmtime) filename_results_json = source_result / name with codecs.open(filename_results_json, 'w', "utf-8") as m_json: json.dump(tt_metrics, m_json) else: filename_results = source_result / "snips_semeval_2020_model_task1_{}".format(save) train_data_obj = BuildSnipsDataTask1(lang, cross=cross, vers=save) train_data = train_data_obj.build_snips_data_task1() nlu_engine = SnipsNLUEngine(config=CONFIG_EN) print("--> Training patent data with Snips...") nlu_engine.fit(train_data) try: print("--> Saving model trained with Snips (JOBLIB)...") filename_joblib = source_result / "snips_semeval_2020_model_task1_{}.pkl".format(save) with codecs.open(filename_joblib, 'wb') as metric: pickle.dump(nlu_engine, metric) except: pass print("--> Saving model trained with Snips (SNIPS)...") try: nlu_engine.persist(filename_results) except: pass
def test_parse(self): # Given / When dataset_stream = io.StringIO(u""" --- type: intent name: MakeTea utterances: - make me a [beverage_temperature:Temperature](hot) cup of tea - make me [number_of_cups:snips/number](five) tea cups --- type: intent name: MakeCoffee utterances: - brew [number_of_cups:snips/number](one) cup of coffee please - make me [number_of_cups] cups of coffee""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json nlu_engine = SnipsNLUEngine().fit(dataset) nlu_engine.persist(self.tmp_file_path) # When / Then output_target = io.StringIO() with self.fail_if_exception("Failed to parse using CLI script"): with redirect_stdout(output_target): parse(str(self.tmp_file_path), "Make me two cups of coffee") output = output_target.getvalue() # Then expected_output = """{ "input": "Make me two cups of coffee", "intent": { "intentName": "MakeCoffee", "probability": 1.0 }, "slots": [ { "entity": "snips/number", "range": { "end": 11, "start": 8 }, "rawValue": "two", "slotName": "number_of_cups", "value": { "kind": "Number", "value": 2.0 } } ] } """ self.assertEqual(expected_output, output)
def train(dataset_file_path, train_directory): with io.open(dataset_file_path) as f: dataset = json.load(f) language = dataset.get("language", None) config = language_configs.get(language, None) if config is None: raise Exception( f"No language configuration for language {dataset.language}") nlu_engine = SnipsNLUEngine(config=config) nlu_engine.fit(dataset) nlu_engine.persist(train_directory)
def main(): """ This builds a training dataset, trains an NLU engine with it, and saves that engine. This must be done any time a new command is added or utterances are edited for a command. If an NLU engine already exists, it is deleted. """ training_json = json.loads(build_training_dataset()) engine_path = os.path.join('Voithos', 'utilities', 'NLU') nlu_engine = SnipsNLUEngine(config=CONFIG_EN) nlu_engine = nlu_engine.fit(training_json) try: nlu_engine.persist(engine_path) except PersistingError: shutil.rmtree(engine_path) nlu_engine.persist(engine_path)
def test_engine_with_keyword_slot_filler_should_be_serializable(self): # Given dataset_stream = io.StringIO(""" --- type: intent name: SetLightColor utterances: - set the light to [color](blue) in the [room](kitchen) - please make the lights [color](red) in the [room](bathroom)""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json intent = "SetLightColor" slot_filler_config = { "unit_name": "keyword_slot_filler", "lowercase": True } parser_config = ProbabilisticIntentParserConfig( slot_filler_config=slot_filler_config) engine_config = NLUEngineConfig([parser_config]) engine = SnipsNLUEngine(engine_config).fit(dataset, intent) engine.persist(self.tmp_file_path) text = "I want Red lights in the kitchen now" # When loaded_engine = SnipsNLUEngine.from_path(self.tmp_file_path) res = loaded_engine.parse(text) # Then expected_slots = [ custom_slot( unresolved_slot(match_range={ START: 7, END: 10 }, value="Red", entity="color", slot_name="color"), "red"), custom_slot( unresolved_slot(match_range={ START: 25, END: 32 }, value="kitchen", entity="room", slot_name="room")) ] self.assertListEqual(expected_slots, res["slots"])
def train(dataset_path, output_path, config_path): """Train an NLU engine on the provided dataset""" with Path(dataset_path).open("r", encoding="utf8") as f: dataset = json.load(f) config = None if config_path is not None: with Path(config_path).open("r", encoding="utf8") as f: config = json.load(f) load_resources(dataset["language"]) print("Create and train the engine...") engine = SnipsNLUEngine(config).fit(dataset) print("Persisting the engine...") engine.persist(output_path) print("Saved the trained engine to %s" % output_path)
def train_nlu(): with io.open("training_data/dataset.json") as f: sample_dataset = json.load(f) nlu_engine = SnipsNLUEngine() print("Snips training started") train_start = datetime.datetime.now() nlu_engine = nlu_engine.fit(sample_dataset) if os.path.exists(model_path): shutil.rmtree(model_path, ignore_errors=True) nlu_engine.persist(model_path) else: nlu_engine.persist(model_path) train_end = datetime.datetime.now() print("Total time to train the Snips model: {0}".format(train_end - train_start))
def train(dataset_path, output_path, config_path, verbose): """Train an NLU engine on the provided dataset""" if verbose: set_nlu_logger(logging.DEBUG) with Path(dataset_path).open("r", encoding="utf8") as f: dataset = json.load(f) config = None if config_path is not None: with Path(config_path).open("r", encoding="utf8") as f: config = json.load(f) print("Create and train the engine...") engine = SnipsNLUEngine(config).fit(dataset) print("Persisting the engine...") engine.persist(output_path) print("Saved the trained engine to %s" % output_path)
def train(dataset_path, output_path, config_path=None, verbose=False, random_seed=None): """Train an NLU engine on the provided dataset""" import json import logging from pathlib import Path from snips_nlu import SnipsNLUEngine from snips_nlu.cli.utils import set_nlu_logger from snips_nlu.common.utils import check_random_state if verbose == 1: set_nlu_logger(logging.INFO) elif verbose >= 2: set_nlu_logger(logging.DEBUG) with Path(dataset_path).open("r", encoding="utf8") as f: dataset = json.load(f) config = None if config_path is not None: with Path(config_path).open("r", encoding="utf8") as f: config = json.load(f) random_state = check_random_state(random_seed) print("Create and train the engine...") engine = SnipsNLUEngine(config, random_state=random_state).fit(dataset) print("Persisting the engine...") engine.persist(output_path) print("Saved the trained engine to %s" % output_path)
def init(seed): with io.open("Dataset/dataset.json",encoding="utf8") as f: sample_dataset = json.load(f) print("initialising Lynda") from snips_nlu import SnipsNLUEngine from snips_nlu.default_configs import CONFIG_EN nlu_engine = SnipsNLUEngine(config=CONFIG_EN, random_state=seed) #training the nlu model nlu_engine.fit(sample_dataset) print("momdel created") nlu_engine.persist('model') # saving the trained model print("model dumped")
from snips_nlu import SnipsNLUEngine from snips_nlu.default_configs import CONFIG_EN seed = 42 engine = SnipsNLUEngine(config=CONFIG_EN, random_state=seed) dataset_name = 'dataset1.json' get_current_working_directory = os.getcwd() splitted_current_working_directory = get_current_working_directory.split( os.sep) # print(splitted_current_working_directory) directory_index = splitted_current_working_directory.index('NLU') # print(directory_index) root_directory_path = os.sep.join( splitted_current_working_directory[:directory_index + 1]) # print(root_directory_path) dataset_path = os.path.join(root_directory_path, 'dataset', 'json', dataset_name) with open(dataset_path, encoding='utf-16', errors='ignore') as f: dataset = json.load(f, strict=False) print("TRAINING THE ENGINE...") engine.fit(dataset) shutil.rmtree(os.path.join(root_directory_path, 'trained_model')) engine.persist(os.path.join(root_directory_path, 'trained_model')) print("ENGINE TRAINED.")
import io import json from snips_nlu import SnipsNLUEngine from snips_nlu.default_configs import CONFIG_EN # snips-nlu generate-dataset en ticket.yaml > ticket.json # The first step is to understand which intent the sentence is about. # The second step is to extract the parameters, a.k.a. the slots of the sentence. if __name__ == "__main__": engine = SnipsNLUEngine(config=CONFIG_EN, random_state=233) with io.open("dataset/ticket.json") as f: dataset = json.load(f) # Train model. engine.fit(dataset) # Save model. engine.persist("snips.model") # # Test # parsing = engine.parse("Can I get 2 tickets for the big short?") # print(json.dumps(parsing, indent=2))
import io import json from snips_nlu import SnipsNLUEngine from snips_nlu.default_configs import CONFIG_DE engine = SnipsNLUEngine(config=CONFIG_DE) with io.open("dataset.json") as f: dataset = json.load(f) engine.fit(dataset) engine.persist("QBo_Model")
def train( sentences_dict: typing.Dict[str, str], language: str, slots_dict: typing.Optional[typing.Dict[str, typing.List[str]]] = None, engine_path: typing.Optional[typing.Union[str, Path]] = None, dataset_path: typing.Optional[typing.Union[str, Path]] = None, ) -> SnipsNLUEngine: """Generate Snips YAML dataset from Rhasspy sentences/slots.""" slots_dict = slots_dict or {} _LOGGER.debug("Creating Snips engine for language %s", language) engine = SnipsNLUEngine(config=DEFAULT_CONFIGS[language]) # Parse JSGF sentences _LOGGER.debug("Parsing sentences") with io.StringIO() as ini_file: # Join as single ini file for lines in sentences_dict.values(): print(lines, file=ini_file) print("", file=ini_file) intents = rhasspynlu.parse_ini(ini_file.getvalue()) # Split into sentences and rule/slot replacements sentences, replacements = rhasspynlu.ini_jsgf.split_rules(intents) for intent_sentences in sentences.values(): for sentence in intent_sentences: rhasspynlu.jsgf.walk_expression(sentence, rhasspynlu.number_range_transform, replacements) # Convert to directed graph *without* expanding slots # (e.g., $rhasspy/number) _LOGGER.debug("Converting to intent graph") intent_graph = rhasspynlu.sentences_to_graph(sentences, replacements=replacements, expand_slots=False) # Get start/end nodes for graph start_node, end_node = rhasspynlu.jsgf_graph.get_start_end_nodes( intent_graph) assert (start_node is not None) and (end_node is not None), "Missing start/end node(s)" if dataset_path: # Use user file dataset_file = open(dataset_path, "w+") else: # Use temporary file dataset_file = typing.cast( typing.TextIO, tempfile.NamedTemporaryFile(suffix=".yml", mode="w+")) dataset_path = dataset_file.name with dataset_file: _LOGGER.debug("Writing YAML dataset to %s", dataset_path) # Walk first layer of edges with intents for _, intent_node, edge_data in intent_graph.edges(start_node, data=True): intent_name: str = edge_data["olabel"][9:] # New intent print("---", file=dataset_file) print("type: intent", file=dataset_file) print("name:", quote(intent_name), file=dataset_file) print("utterances:", file=dataset_file) # Get all paths through the graph (utterances) used_utterances: typing.Set[str] = set() paths = nx.all_simple_paths(intent_graph, intent_node, end_node) for path in paths: utterance = [] entity_name = None slot_name = None slot_value = None # Walk utterance edges for from_node, to_node in rhasspynlu.utils.pairwise(path): edge_data = intent_graph.edges[(from_node, to_node)] ilabel = edge_data.get("ilabel") olabel = edge_data.get("olabel") if olabel: if olabel.startswith("__begin__"): slot_name = olabel[9:] entity_name = None slot_value = "" elif olabel.startswith("__end__"): if entity_name == "rhasspy/number": # Transform to Snips number entity_name = "snips/number" elif not entity_name: # Collect actual value assert ( slot_name and slot_value ), f"No slot name or value (name={slot_name}, value={slot_value})" entity_name = slot_name slot_values = slots_dict.get(slot_name) if not slot_values: slot_values = [] slots_dict[slot_name] = slot_values slot_values.append(slot_value.strip()) # Reference slot/entity (values will be added later) utterance.append(f"[{slot_name}:{entity_name}]") # Reset current slot/entity entity_name = None slot_name = None slot_value = None elif olabel.startswith("__source__"): # Use Rhasspy slot name as entity entity_name = olabel[10:] if ilabel: # Add to current slot/entity value if slot_name and (not entity_name): slot_value += ilabel + " " else: # Add directly to utterance utterance.append(ilabel) elif (olabel and (not olabel.startswith("__")) and slot_name and (not slot_value) and (not entity_name)): slot_value += olabel + " " if utterance: utterance_str = " ".join(utterance) if utterance_str not in used_utterances: # Write utterance print(" -", quote(utterance_str), file=dataset_file) used_utterances.add(utterance_str) print("", file=dataset_file) # Write entities for slot_name, values in slots_dict.items(): if slot_name.startswith("$"): # Remove arguments and $ slot_name = slot_name.split(",")[0][1:] # Skip numbers if slot_name in {"rhasspy/number"}: # Should have been converted already to snips/number continue # Keep only unique values values_set = set(values) print("---", file=dataset_file) print("type: entity", file=dataset_file) print("name:", quote(slot_name), file=dataset_file) print("values:", file=dataset_file) slot_graph = rhasspynlu.sentences_to_graph({ slot_name: [ rhasspynlu.jsgf.Sentence.parse(value) for value in values_set ] }) start_node, end_node = rhasspynlu.jsgf_graph.get_start_end_nodes( slot_graph) n_data = slot_graph.nodes(data=True) for path in nx.all_simple_paths(slot_graph, start_node, end_node): words = [] for node in path: node_data = n_data[node] word = node_data.get("word") if word: words.append(word) if words: print(" -", quote(" ".join(words)), file=dataset_file) print("", file=dataset_file) # ------------ # Train engine # ------------ if engine_path: # Delete existing engine engine_path = Path(engine_path) engine_path.parent.mkdir(exist_ok=True) if engine_path.is_dir(): # Snips will fail it the directory exists _LOGGER.debug("Removing existing engine at %s", engine_path) shutil.rmtree(engine_path) elif engine_path.is_file(): _LOGGER.debug("Removing unexpected file at %s", engine_path) engine_path.unlink() _LOGGER.debug("Training engine") dataset_file.seek(0) dataset = Dataset.from_yaml_files(language, [dataset_file]) engine = engine.fit(dataset) if engine_path: # Save engine engine.persist(engine_path) _LOGGER.debug("Engine saved to %s", engine_path) return engine
class SnipsNluTrainer: """Class to train Snips NLU with training data from Cloudant DB with rollback support.""" def __init__(self, database_context, cos_context): #Cloudant DB self.context = database_context self.training_data = {} self.cos_context = cos_context self._check_trainer_dir(ENGINE_PATH_ZIP) load_resources("de") load_resources("en") self.nlu_engine = SnipsNLUEngine() def start_training(self): self._load_training_data() self._train_nlu() result = self._persist_nlu() return result def rollback_nlu(self): result = False if not ENGINE_PATH_NEW.exists(): print("No backups exist locally..") if not self.cos_context.file_exist_in_bucket(OLD_ENGINE_NAME_ZIP): print("There are no backups in bucket..") print("Data rollback is not possible!") else: print("Found saved backups in bucket..") self._load_from_bucket(ENGINE_PATH_ZIP, OLD_ENGINE_NAME_ZIP, ENGINE_PATH_ZIP) print("Restored backup from bucket to '{0}'".format( ENGINE_PATH_ZIP)) self.rollback_nlu() else: loaded_engine = SnipsNLUEngine.from_path(ENGINE_PATH_NEW) self.nlu_engine = loaded_engine #Remove new/old local nlu folders. Save backup as new engine #shutil.rmtree(ENGINE_PATH_NEW) #shutil.rmtree(ENGINE_PATH_OLD) result = self._persist_nlu() print("Engine rollback was successful") return result def get_nlu_engine(self): if not ENGINE_PATH_NEW.exists(): print("No engine found locally...") print("Searching in bucket...") if not self.cos_context.file_exist_in_bucket(NEW_ENGINE_NAME_ZIP): print("There are no engine in bucket!") print("Engine must be fitted! Please run 'start training'") return "" else: print("Found saved engine in bucket..") self._load_from_bucket(ENGINE_PATH_ZIP, NEW_ENGINE_NAME_ZIP, ENGINE_PATH_ZIP) print("Restored saved engine from bucket to '{0}'".format( ENGINE_PATH_ZIP)) self.get_nlu_engine() else: loaded_engine = SnipsNLUEngine.from_path(ENGINE_PATH_NEW) self.nlu_engine = loaded_engine print("Success! Engine was fitted...") return self.nlu_engine def _load_training_data(self): self.training_data = self.context.get_trainings_data() if self.training_data == "": print("There are no training data!") else: print("Training data were loaded successfully") def _train_nlu(self): self.nlu_engine.fit(self.training_data) print("Engine was trained successfully") def _persist_nlu(self): result = False # first save engine attempt if not (ENGINE_PATH_NEW.exists()): self.nlu_engine.persist(ENGINE_PATH_NEW) result = self._persist_to_bucket(ENGINE_PATH_NEW, ENGINE_PATH_ZIP, NEW_ENGINE_NAME_ZIP) else: #Remove&override old backup if ENGINE_PATH_OLD.exists(): shutil.rmtree(ENGINE_PATH_OLD) self.cos_context.remove_file(OLD_ENGINE_NAME_ZIP) print("Overrided old engine backup...") #save(rename) new engine as old local and in persist os.rename(ENGINE_PATH_NEW, ENGINE_PATH_OLD) self.cos_context.rename_file(NEW_ENGINE_NAME_ZIP, OLD_ENGINE_NAME_ZIP) #create new new engine self.nlu_engine.persist(ENGINE_PATH_NEW) result = self._persist_to_bucket(ENGINE_PATH_NEW, ENGINE_PATH_ZIP, NEW_ENGINE_NAME_ZIP) if result: print("Engine was saved successfully") return result #Persist engine as zip to bucket to decrease up/download time (5-6 MB vs 1.5 MB compressed) def _compress_engine(self, source, destination): base = os.path.basename(destination) name = base.split('.')[0] format = base.split('.')[1] archive_from = os.path.dirname(source) archive_to = os.path.basename(source.strip(os.sep)) print(source, destination, archive_from, archive_to) shutil.make_archive(name, format, archive_from, archive_to) shutil.move('%s.%s' % (name, format), destination) print("Engine was zipped...") def _decompress_engine(self, source, destination): zip_ref = zipfile.ZipFile(source, 'r') zip_ref.extractall(destination) print("Engine was unzipped..") #Engine folder -> zip -> save to ibm bucket def _persist_to_bucket(self, source, destination, file_name): #Python3 -> python2 compatibility, libpath Path to string source = str(source) destination = str(destination) file_name = str(file_name) self._compress_engine(source, destination + "/" + file_name) result = self.cos_context.upload_file(destination + "/" + file_name, file_name) return result # Download zipped engine -> save -> unzip it def _load_from_bucket(self, destination_zip, file_name, to_unzip_path): #Python3 -> python2 compatibility, libpath Path to string destination_zip = str(destination_zip) file_name = str(file_name) to_unzip_path = str(to_unzip_path) result = self.cos_context.download_file(destination_zip, file_name) if result: self._decompress_engine(destination_zip + "/" + file_name, to_unzip_path) return result def _check_trainer_dir(self, path): exist = os.path.isdir(path) if not exist: os.makedirs(path) exist = True print("Path '{0}' was created!".format(path)) return exist
import io import json from snips_nlu import SnipsNLUEngine from snips_nlu.default_configs import CONFIG_EN #For German please use ..._DE engine = SnipsNLUEngine(config=CONFIG_EN) with io.open( "dataset.json" ) as f: #dataset.json needs to be changed to the JSON you generated via YAML dataset = json.load(f) engine.fit(dataset) engine.persist("path/to/directory")
class SnipsInterpreter(Interpreter): """Wraps the snips-nlu stuff to provide valuable informations to an agent. """ def __init__(self, lang: str, cache_directory: str = None, trainings_store: TrainingsStore = None) -> None: """Instantiates a new Snips interpreter. Args: lang (str): Language used for this interpreter (ie. en, fr, ...) cache_directory (str): Path where training and trained files are placed trainings_store (TrainingsStore): Optional trainings store used when fitting the engine """ super(SnipsInterpreter, self).__init__('snips', lang, cache_directory, trainings_store) self._engine = None self._slot_mappings = {} self._entities = {} def _configure(self) -> None: self._slot_mappings = self._engine.dataset_metadata.get( 'slot_name_mappings', {}) self._entities = self._engine.dataset_metadata.get(ENTITIES, {}) self.intents = list(self._slot_mappings.keys()) def load_from_cache(self) -> None: self._logger.info('Loading engine from "%s"', self.cache_directory) self._engine = SnipsNLUEngine.from_path(self.cache_directory) self._configure() def _check_and_install_resources_package(self) -> None: resource_pkg_name = f'snips_nlu_{self.lang}' if not importlib.util.find_spec(resource_pkg_name): self._logger.info( 'Could not import resource package "%s", attempting installation', resource_pkg_name) try: subprocess.run( [sys.executable, '-m', 'snips_nlu', 'download', self.lang], check=True, stdout=subprocess.PIPE) self._logger.info('Successfuly downloaded "%s"!', resource_pkg_name) # Reload resources (used by snips to determine if resources are installed) importlib.reload(pkg_resources) except: # pragma: no cover pylint: disable=W0702 self._logger.warning( 'Looks like it fails, you may have to do it manually with: '\ '"python -m snips_nlu download %s"', self.lang) return resource_pkg_name def fit(self, data: dict) -> None: super().fit(data) data_lang = data.get('language') if data_lang != self.lang: # pragma: no cover self._logger.warning( 'Training language "%s" and interpreter language "%s" do not match,'\ 'things could go badly', data_lang, self.lang) self._logger.info('Fitting using "snips v%s"', __version__) checksum = compute_checksum(data) cached_checksum = None # Try to load the used checksum if self.cache_directory: cached_checksum_path = os.path.join(self.cache_directory, 'trained.checksum') cached_checksum = read_file(cached_checksum_path, ignore_errors=True) if not cached_checksum: self._logger.debug('Checksum file not found') if checksum == cached_checksum: self.load_from_cache() else: config = None try: self._logger.info( 'Importing default configuration for language "%s"', self.lang) config = getattr(snips_confs, 'CONFIG_%s' % self.lang.upper()) except AttributeError: self._logger.warning( 'Could not import default configuration, it will use the generic one instead' ) resource_pkg_name = self._check_and_install_resources_package() self._engine = SnipsNLUEngine( config, resources=load_resources(resource_pkg_name)) self._engine.fit(data) if self.cache_directory: # pragma: no cover self._logger.info('Persisting trained engine to "%s"', self.cache_directory) # Make sure the cache directory has been cleaned out rmtree(self.cache_directory, ignore_errors=True) self._engine.persist(self.cache_directory) with open(cached_checksum_path, mode='w') as file: file.write(checksum) self._configure() @property def is_ready(self) -> bool: """Returns true if the interpreter is ready. Returns: bool: Ready or not """ return self._engine and self._engine.fitted def parse(self, msg: str, scopes: List[str] = None) -> List[Intent]: if not self.is_ready: return [] # TODO manage multiple intents in the same sentence parsed = self._engine.parse(msg, intents=scopes) if not parsed[RES_INTENT][RES_INTENT_NAME]: return [] slots = {} for slot in parsed[RES_SLOTS]: name = slot[RES_SLOT_NAME] parsed_slot = slot[RES_VALUE] value = SlotValue(get_entity_value(parsed_slot), **slot) if name in slots: slots[name].append(value) else: slots[name] = [value] return [ Intent(parsed[RES_INTENT][RES_INTENT_NAME], **slots), ] def parse_slot(self, intent: str, slot: str, msg: str) -> List[SlotValue]: if not self.is_ready: return [] # Here I still use my own method to parse slots because it gives better # results in my benchmarks. # # However, we should keep an eye on https://github.com/snipsco/snips-nlu/pull/724 # for when it becomes relevant. For now get_slots returns less results than this # homemade method below. entity_label = self._slot_mappings.get(intent, {}).get(slot) # No label, just returns the given value if not entity_label: return [SlotValue(msg)] result = [] # If it's a builtin entity, try to parse it if is_builtin_entity(entity_label): parsed = self._engine.builtin_entity_parser.parse( msg, [entity_label]) for slot_data in parsed: # Here we move some keys to keep the returned meta consistent with the parse above # We are checking if `rawValue` is already present because snips-nlu seems to keep # a cache so to avoid mutating the same dict twice, we check again this added key. if RES_RAW_VALUE not in slot_data: slot_data[RES_RAW_VALUE] = slot_data[RES_VALUE] slot_data[RES_VALUE] = slot_data[RESOLVED_VALUE] slot_data[ENTITY] = slot_data[ENTITY_KIND] result.append( SlotValue(get_entity_value(slot_data[RES_VALUE]), **slot_data)) else: parsed = self._engine.custom_entity_parser.parse( msg, [entity_label]) # The custom parser did not found a match and it's extensible? Just returns the value if not parsed and self._entities.get(entity_label, {})[AUTOMATICALLY_EXTENSIBLE]: return [SlotValue(msg)] for slot_data in parsed: if RES_RAW_VALUE not in slot_data: slot_data[RES_RAW_VALUE] = slot_data[RES_VALUE] slot_data[RES_VALUE] = { 'kind': 'Custom', RES_VALUE: slot_data[RESOLVED_VALUE], } slot_data[ENTITY] = slot_data[ENTITY_KIND] result.append( SlotValue(get_entity_value(slot_data[RES_VALUE]), **slot_data)) return result
import io import json from snips_nlu import SnipsNLUEngine from snips_nlu.default_configs import CONFIG_EN # snips-nlu generate-dataset en dataset.yaml > dataset.json engine = SnipsNLUEngine(config=CONFIG_EN) with io.open('dataset.json') as f: dataset = json.load(f) engine.fit(dataset) engine.persist("model")
if (lang == "fr"): config = CONFIG_FR if (lang == "it"): config = CONFIG_IT if (lang == "ja"): config = CONFIG_JA if (lang == "ko"): config = CONFIG_KO add_entity(lang) os.system("python3 -m snips_nlu download " + lang) os.system("python3 -m snips_nlu generate-dataset " + lang + " " + lang + ".definition.yaml > definition.json") try: shutil.rmtree(lang) except Exception as e: print(e) DATASET_PATH = Path(__file__).parent / "definition.json" with DATASET_PATH.open(encoding="utf8") as f: sample_dataset = json.load(f) nlu_engine = SnipsNLUEngine(config=config) nlu_engine.fit(sample_dataset) nlu_engine.persist(lang)
class SnipsInterpreter(Interpreter): """Wraps the snips-nlu stuff to provide valuable informations to an agent. """ def __init__(self, lang, cache_directory=None): """Instantiates a new Snips interpreter. Args: lang (str): Language used for this interpreter (ie. en, fr, ...) cache_directory (str): Path where training and trained files are placed """ super(SnipsInterpreter, self).__init__('snips', lang, cache_directory) self._engine = None self._slot_mappings = {} self._entities = {} def _configure(self): self._slot_mappings = self._engine.dataset_metadata.get( 'slot_name_mappings', {}) self._entities = self._engine.dataset_metadata.get(ENTITIES, {}) self.intents = list(self._slot_mappings.keys()) def load_from_cache(self): self._logger.info('Loading engine from "%s"' % self.cache_directory) self._engine = SnipsNLUEngine.from_path(self.cache_directory) self._configure() def fit(self, data): data_lang = data.get('language') if data_lang != self.lang: self._logger.warning( 'Training language "%s" and interpreter language "%s" do not match, things could go badly' % (data_lang, self.lang)) self._logger.info('Fitting using "snips v%s"' % __version__) checksum = compute_checksum(data) cached_checksum = None # Try to load the used checksum if self.cache_directory: cached_checksum_path = os.path.join(self.cache_directory, 'trained.checksum') cached_checksum = read_file(cached_checksum_path, ignore_errors=True) if not cached_checksum: self._logger.debug('Checksum file not found') if checksum == cached_checksum: self.load_from_cache() else: config = None try: self._logger.info( 'Importing default configuration for language "%s"' % self.lang) config = getattr(snips_confs, 'CONFIG_%s' % self.lang.upper()) except AttributeError: self._logger.warning( 'Could not import default configuration, it will use the generic one instead' ) self._engine = SnipsNLUEngine(config, resources=load_resources( 'snips_nlu_%s' % self.lang)) self._engine.fit(data) if self.cache_directory: self._logger.info('Persisting trained engine to "%s"' % self.cache_directory) rmtree(self.cache_directory, ignore_errors=True) self._engine.persist(self.cache_directory) with open(cached_checksum_path, mode='w') as f: f.write(checksum) self._configure() @property def is_ready(self): """Returns true if the interpreter is ready. Returns: bool: Ready or not """ return self._engine and self._engine.fitted def parse(self, msg, scopes=None): if not self.is_ready: return [] # TODO manage multiple intents in the same sentence parsed = self._engine.parse(msg, intents=scopes) if parsed[RES_INTENT][RES_INTENT_NAME] == None: return [] slots = {} for slot in parsed[RES_SLOTS]: name = slot[RES_SLOT_NAME] parsed_slot = slot[RES_VALUE] value = SlotValue(get_entity_value(parsed_slot), **slot) if name in slots: slots[name].append(value) else: slots[name] = [value] return [ Intent(parsed[RES_INTENT][RES_INTENT_NAME], **slots), ] def parse_slot(self, intent, slot, msg): if not self.is_ready: return [] # Here I still use my own method to parse slots because it gives better # results in my benchmarks. # # However, we should keep an eye on https://github.com/snipsco/snips-nlu/pull/724 # for when it becomes relevant. For now get_slots returns less results than this # homemade method below. entity_label = self._slot_mappings.get(intent, {}).get(slot) # No label, just returns the given value if not entity_label: return [SlotValue(msg)] result = [] # If it's a builtin entity, try to parse it if is_builtin_entity(entity_label): parsed = self._engine.builtin_entity_parser.parse( msg, [entity_label]) for slot_data in parsed: # Here we move some keys to keep the returned meta consistent with the parse above # We are checking if `rawValue` is already present because snips-nlu seems to keep # a cache so to avoid mutating the same dict twice, we check again this added key. if RES_RAW_VALUE not in slot_data: slot_data[RES_RAW_VALUE] = slot_data[RES_VALUE] slot_data[RES_VALUE] = slot_data[RESOLVED_VALUE] slot_data[ENTITY] = slot_data[ENTITY_KIND] result.append( SlotValue(get_entity_value(slot_data[RES_VALUE]), **slot_data)) else: parsed = self._engine.custom_entity_parser.parse( msg, [entity_label]) # The custom parser did not found a match and it's extensible? Just returns the value if not parsed and self._entities.get( entity_label, {})[AUTOMATICALLY_EXTENSIBLE] == True: return [SlotValue(msg)] for slot_data in parsed: if RES_RAW_VALUE not in slot_data: slot_data[RES_RAW_VALUE] = slot_data[RES_VALUE] slot_data[RES_VALUE] = { 'kind': 'Custom', RES_VALUE: slot_data[RESOLVED_VALUE], } slot_data[ENTITY] = slot_data[ENTITY_KIND] result.append( SlotValue(get_entity_value(slot_data[RES_VALUE]), **slot_data)) return result
import io import json from snips_nlu import SnipsNLUEngine with io.open("dataset.json") as f: dataset = json.load(f) engine = SnipsNLUEngine() engine.fit(dataset) engine.persist("../model")
@author: yanni """ import io import json from snips_nlu import SnipsNLUEngine path = '/Users/yanni/PycharmProjects/chatbot/src/' ### train Slots Detection Model #!snips-nlu generate-dataset en {path}/Movie_intent.yaml {path}/Movie_entity.yaml > {path}/Movie_dataset.json with io.open(path + 'Movie_dataset.json') as f: sample_dataset = json.load(f) nlu_engine = SnipsNLUEngine() nlu_engine.fit(sample_dataset) nlu_engine.persist(path + 'Movie_Slots_Detection') #!snips-nlu generate-dataset en {path}/Aspect_intent.yaml {path}/Aspect_entity.yaml > {path}/Aspect_dataset.json with io.open(path + 'Aspect_dataset.json') as f: aspect_dataset = json.load(f) nlu_engine = SnipsNLUEngine() nlu_engine.fit(aspect_dataset) nlu_engine.persist(path + 'Aspect_Slots_Detection')
""" This file is responsible for training and saving the SnipsNLU Engine """ import json from snips_nlu import SnipsNLUEngine from snips_nlu.default_configs import CONFIG_EN engine = SnipsNLUEngine(config=CONFIG_EN) with open("dataset.json") as f: dataset = json.load(f) engine.fit(dataset) engine.persist("persisted_engine")
from __future__ import unicode_literals, print_function import io import json from snips_nlu import SnipsNLUEngine, load_resources from snips_nlu.default_configs import CONFIG_EN, CONFIG_DE with io.open("lights_dataset_de.json") as f: sample_dataset = json.load(f) # load_resources("en") # nlu_engine = SnipsNLUEngine(config=CONFIG_EN) load_resources("de") nlu_engine = SnipsNLUEngine(config=CONFIG_DE) nlu_engine.fit(sample_dataset) nlu_engine.persist("models/current")
import io import json from snips_nlu import SnipsNLUEngine from snips_nlu.default_configs import CONFIG_EN engine = SnipsNLUEngine(config=CONFIG_EN) with io.open("chess_speech.json") as f: dataset = json.load(f) engine.fit(dataset) engine.persist("./chess_engine")