def fit(self, data: dict) -> None: super().fit(data) data_lang = data.get('language') if data_lang != self.lang: # pragma: no cover self._logger.warning( 'Training language "%s" and interpreter language "%s" do not match,'\ 'things could go badly', data_lang, self.lang) self._logger.info('Fitting using "snips v%s"', __version__) checksum = compute_checksum(data) cached_checksum = None # Try to load the used checksum if self.cache_directory: cached_checksum_path = os.path.join(self.cache_directory, 'trained.checksum') cached_checksum = read_file(cached_checksum_path, ignore_errors=True) if not cached_checksum: self._logger.debug('Checksum file not found') if checksum == cached_checksum: self.load_from_cache() else: config = None try: self._logger.info( 'Importing default configuration for language "%s"', self.lang) config = getattr(snips_confs, 'CONFIG_%s' % self.lang.upper()) except AttributeError: self._logger.warning( 'Could not import default configuration, it will use the generic one instead' ) resource_pkg_name = self._check_and_install_resources_package() self._engine = SnipsNLUEngine( config, resources=load_resources(resource_pkg_name)) self._engine.fit(data) if self.cache_directory: # pragma: no cover self._logger.info('Persisting trained engine to "%s"', self.cache_directory) # Make sure the cache directory has been cleaned out rmtree(self.cache_directory, ignore_errors=True) self._engine.persist(self.cache_directory) with open(cached_checksum_path, mode='w') as file: file.write(checksum) self._configure()
def get_nlu_engine(dataset_json_path: str) -> SnipsNLUEngine: with io.open(dataset_json_path) as f: dataset = json.load(f) nlu_engine = SnipsNLUEngine(config=CONFIG_EN) nlu_engine = nlu_engine.fit(dataset) return nlu_engine
def EngineMode(self, mode): """ Saving the engine to use the model for every question (Training Part) or Use the model if it already exists (Testing Part) :param mode: Test or Train string :return: Fitted Engine """ if mode == "Train": load_resources('snips_nlu_en') self.__engine = SnipsNLUEngine(config=CONFIG_EN) with io.open("dataset.json") as f: self.__dataset = json.load(f) self.__engine.fit(self.__dataset) #UnComment to save the model #self.__engine.persist("Z:\FCIS-ASU\Semester 8\ChatbotModel") elif mode == "Test": with io.open("dataset.json") as f: self.__dataset = json.load(f) self.__engine = SnipsNLUEngine.from_path( "Z:\FCIS-ASU\Semester 8\ChatbotModel")
def create_nlp_model() -> SnipsNLUEngine: """ This function trains a new ML model from the given dataset. It then saves the model in the root directory of the project with the file name: nlpumodel This function will only be called once, at the start of the program, if nlumodel file is not detected in the current directory Parameters required: None Return data: Trained SnipsNLUEngine object """ # Creating a barebones engine engine = SnipsNLUEngine(config=CONFIG_EN) # Creating dataset from yaml files present in nlputrain directory data = dataset.Dataset.from_yaml_files( "en", ["./nlputrain/" + i for i in os.listdir("./nlputrain/") if ".yaml" in i] ) # Training the engine with given dataset engine.fit(data) # Persisting the engine so it can be used easily later # Persisting engine is saved in nlumodel folder try: engine.persist("nlumodel") except PersistingError: print("Old NLP file still exists. Deleting..") # Removing old model files using shutil shutil.rmtree("nlumodel") engine.persist("nlumodel") print("NLP model has been created and saved in directory: nlumodel") # Returning trained engine return engine
def init_snipsnlu(): # engine = SnipsNLUEngine(config=CONFIG_EN) engine = SnipsNLUEngine(resources=load_resources("snips_nlu_en")) with io.open("proj.json") as f: dataset = json.load(f) engine.fit(dataset) return engine
def __init__(self): SAMPLE_DATASET_PATH = Path(__file__).parent / "dataset.json" with SAMPLE_DATASET_PATH.open(encoding="utf8") as f: sample_dataset = json.load(f) self.nlu_engine = SnipsNLUEngine(config=CONFIG_EN) self.nlu_engine.fit(sample_dataset) self.threshold = 93
def __init__(self): super(pluginmanager.IPlugin, self).__init__() self._plugins = {} self._pre_train_json = dict() self._pre_train_json['intents'] = {} self._pre_train_json['entities'] = {} self._pre_train_json['language'] = 'en' self.nlu_engine = SnipsNLUEngine()
def train_engine(self): """ # Setup the snipsnlu NLP engine and pass the training data. :return: """ load_resources("en") self.nlu_engine = SnipsNLUEngine(config=CONFIG_EN) self.nlu_engine.fit(self.sample_dataset)
def nluparse(text): load_resources(sample_dataset["language"]) nlu_engine = SnipsNLUEngine(config=config) nlu_engine.fit(sample_dataset) # text = "Show me jobs in LA for today" parsing = nlu_engine.parse(text) return json.dumps(parsing, indent=2)
def train_model(model_file_path): global nlu_engine print("reading model at {}".format(model_file_path)) with io.open(model_file_path) as f: model = json.load(f) nlu_engine = SnipsNLUEngine(config=CONFIG_EN) print("training model") nlu_engine.fit(model)
def get_empty_engine(self): """Load Snips engine configured for specific language.""" assert (self.snips_language in DEFAULT_CONFIGS ), f"Snips language not supported: {self.snips_language}" _LOGGER.debug("Creating empty Snips engine (language=%s)", self.snips_language) return SnipsNLUEngine(config=DEFAULT_CONFIGS[self.snips_language])
def __init__(self, input_object, output_object, engine_param=None): super(DefaultEngine, self).__init__(input_object, output_object, engine_param) self.engine_name = "default_engine" logger.info("Initializing the engine..") self.engine = SnipsNLUEngine() # get the path of the dataset dataset_path = engine_param.get("dataset_path") self.train_model(dataset_path)
def __init__(self): load_resources(u"en") engine = SnipsNLUEngine(config=CONFIG_EN) with io.open(SnipHandler.dataset_path) as f: data_set = json.load(f) self.engine = engine.fit(data_set) SnipHandler.__instance__ = self
class Nlu: nlu_engine = SnipsNLUEngine(config=CONFIG_FR) def __init__(self,fileNlu): with io.open(fileNlu) as f: sample_dataset = json.load(f) self.nlu_engine = self.nlu_engine.fit(sample_dataset) def parse(self,text): return self.nlu_engine.parse(text)
def __init__(self, database_context, cos_context): #Cloudant DB self.context = database_context self.training_data = {} self.cos_context = cos_context self._check_trainer_dir(ENGINE_PATH_ZIP) load_resources("de") load_resources("en") self.nlu_engine = SnipsNLUEngine()
def fit(self, data): data_lang = data.get('language') if data_lang != self.lang: self._logger.warning( 'Training language "%s" and interpreter language "%s" do not match, things could go badly' % (data_lang, self.lang)) self._logger.info('Fitting using "snips v%s"' % __version__) checksum = compute_checksum(data) cached_checksum = None # Try to load the used checksum if self.cache_directory: cached_checksum_path = os.path.join(self.cache_directory, 'trained.checksum') cached_checksum = read_file(cached_checksum_path, ignore_errors=True) if not cached_checksum: self._logger.debug('Checksum file not found') if checksum == cached_checksum: self.load_from_cache() else: config = None try: self._logger.info( 'Importing default configuration for language "%s"' % self.lang) config = getattr(snips_confs, 'CONFIG_%s' % self.lang.upper()) except AttributeError: self._logger.warning( 'Could not import default configuration, it will use the generic one instead' ) self._engine = SnipsNLUEngine(config, resources=load_resources( 'snips_nlu_%s' % self.lang)) self._engine.fit(data) if self.cache_directory: self._logger.info('Persisting trained engine to "%s"' % self.cache_directory) rmtree(self.cache_directory, ignore_errors=True) self._engine.persist(self.cache_directory) with open(cached_checksum_path, mode='w') as f: f.write(checksum) self._configure()
def loadntrain(self, rootpath='./datasets/*.json'): paths = sorted(glob.glob(rootpath)) self.models = [] for i, dset in enumerate(paths): with io.open(dset) as f: dataset = json.load(f) model = SnipsNLUEngine(config=CONFIG_EN) model = model.fit(dataset) self.models.append(model) print(f"{i+1}. Trained for {dset}") print(f"Training for {len(paths)} datasets completed")
def __init__(self, lang, config_file): self.__lang = lang self.__config_file = config_file self.__ongoing_training = False load_resources(self.__lang) with io.open(self.__config_file) as f: self.__config = json.load(f) self.__engine = SnipsNLUEngine(config=self.__config)
def __init__(self, classes, model_path=None, max_api_calls=None, verbose=False): super().__init__(classes, max_api_calls, verbose) load_resources('en') if model_path: self.load_model(model_path) else: self.engine = SnipsNLUEngine(config=CONFIG_EN)
def load_data(): SAMPLE_DATASET_PATH = Path(__file__).parent / "dataset.json" with SAMPLE_DATASET_PATH.open() as f: sample_dataset = json.load(f) load_resources("snips_nlu_en") load_data.nlu_engine = SnipsNLUEngine(config=CONFIG_EN) load_data.nlu_engine.fit(sample_dataset) load_data.nlp = spacy.load('en_core_web_sm') #print(nlu_engine) print("resources Loaded ......")
def conversacion(m): with io.open("training.json") as f: sample_dataset = json.load(f) load_resources("en") nlu_engine = SnipsNLUEngine(config=CONFIG_EN) nlu_engine.fit(sample_dataset) text = (u"" + m.text.lower() + "") listaResultado = nlu_engine.parse(text) return procesarRespuesta(listaResultado)
def test_parse(self): # Given / When dataset_stream = io.StringIO(u""" --- type: intent name: MakeTea utterances: - make me a [beverage_temperature:Temperature](hot) cup of tea - make me [number_of_cups:snips/number](five) tea cups --- type: intent name: MakeCoffee utterances: - brew [number_of_cups:snips/number](one) cup of coffee please - make me [number_of_cups] cups of coffee""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json nlu_engine = SnipsNLUEngine().fit(dataset) nlu_engine.persist(self.tmp_file_path) # When / Then output_target = io.StringIO() with self.fail_if_exception("Failed to parse using CLI script"): with redirect_stdout(output_target): parse(str(self.tmp_file_path), "Make me two cups of coffee") output = output_target.getvalue() # Then expected_output = """{ "input": "Make me two cups of coffee", "intent": { "intentName": "MakeCoffee", "probability": 1.0 }, "slots": [ { "entity": "snips/number", "range": { "end": 11, "start": 8 }, "rawValue": "two", "slotName": "number_of_cups", "value": { "kind": "Number", "value": 2.0 } } ] } """ self.assertEqual(expected_output, output)
def train_eval_snips_nlu_model(lang='en', cross=False, save=''): """ Train snips data from all brat annotation object :param lang: abbreviate language name :param save: path where model will be save :return: None :rtype: None """ from snips_nlu import SnipsNLUEngine from snips_nlu.default_configs import CONFIG_EN from snips_nlu_metrics import compute_train_test_metrics, compute_cross_val_metrics import pickle import json if cross: train_data_obj = BuildSnipsDataTask1(lang, cross=cross, vers=save) train_data = train_data_obj.build_snips_data_task1() print("--> Evaluating training data with Snips metrics...") filename_results = source_result / "snips_semeval_2020_evaluation_task1_{}.pkl".format(save) if not Path(filename_results).exists(): tt_metrics = compute_train_test_metrics(train_dataset=train_data[0], test_dataset=train_data[1], engine_class=SnipsNLUEngine, include_slot_metrics=False) #print(tt_metrics) if not Path(filename_results).exists(): print("--> Writing snips nlu metrics data to file...") with codecs.open(filename_results, 'wb') as metric: pickle.dump(tt_metrics, metric) from datetime import datetime dmtime = "_{}_{}".format(save, datetime.now().strftime("%Y%m%d-%H%M%S")) name = "snips_semeval_2020_evaluation_task1{}.json".format(dmtime) filename_results_json = source_result / name with codecs.open(filename_results_json, 'w', "utf-8") as m_json: json.dump(tt_metrics, m_json) else: filename_results = source_result / "snips_semeval_2020_model_task1_{}".format(save) train_data_obj = BuildSnipsDataTask1(lang, cross=cross, vers=save) train_data = train_data_obj.build_snips_data_task1() nlu_engine = SnipsNLUEngine(config=CONFIG_EN) print("--> Training patent data with Snips...") nlu_engine.fit(train_data) try: print("--> Saving model trained with Snips (JOBLIB)...") filename_joblib = source_result / "snips_semeval_2020_model_task1_{}.pkl".format(save) with codecs.open(filename_joblib, 'wb') as metric: pickle.dump(nlu_engine, metric) except: pass print("--> Saving model trained with Snips (SNIPS)...") try: nlu_engine.persist(filename_results) except: pass
def train(dataset_file_path, train_directory): with io.open(dataset_file_path) as f: dataset = json.load(f) language = dataset.get("language", None) config = language_configs.get(language, None) if config is None: raise Exception( f"No language configuration for language {dataset.language}") nlu_engine = SnipsNLUEngine(config=config) nlu_engine.fit(dataset) nlu_engine.persist(train_directory)
def __init__(self, vosk_model_path, wakeword_detector, nlu_dataset, client, samplerate=16000): self.stt = vosk.KaldiRecognizer(vosk.Model(vosk_model_path), samplerate) self.client = client self.samplerate = samplerate self.listener = Listener(samplerate, self.on_noise) self.wakeword_detector = wakeword_detector self.nlu_engine = SnipsNLUEngine(config=CONFIG_FR) self.nlu_engine.fit(json.load(open(nlu_dataset)))
def make_nlu_model_json(fname): docs = yaml.load_all(stream) ll = [] for doc in docs: i = {} for k, v in doc.items(): i[k] = v ll.append(i) dataset = Dataset.from_yaml_files("en", [ll]) nlu_engine = SnipsNLUEngine(config=CONFIG_EN) nlu_engine = nlu_engine.fit(dataset) text = "Please turn the light on in the kitchen" parsing = nlu_engine.parse(text)
def test_default_configs_should_work(self): # Given dataset = deepcopy(WEATHER_DATASET) for language in get_all_languages(): # When config = DEFAULT_CONFIGS.get(language) self.assertIsNotNone(config, "Missing default config for '%s'" % language) dataset[LANGUAGE] = language engine = SnipsNLUEngine(config).fit(dataset) result = engine.parse("Please give me the weather in Paris") # Then intent_name = result[RES_INTENT][RES_INTENT_NAME] self.assertEqual("SearchWeatherForecast", intent_name)
def runEngine(query): with io.open("dataset.json") as f: dataset = json.load(f) load_resources("en") #with io.open("config_en.json") as f: # config = json.load(f) #engine = SnipsNLUEngine(config=config) engine = SnipsNLUEngine(config=CONFIG_EN) engine.fit(dataset) parsing = engine.parse(query) return json.dumps(parsing, indent=2)
def main(): """ This builds a training dataset, trains an NLU engine with it, and saves that engine. This must be done any time a new command is added or utterances are edited for a command. If an NLU engine already exists, it is deleted. """ training_json = json.loads(build_training_dataset()) engine_path = os.path.join('Voithos', 'utilities', 'NLU') nlu_engine = SnipsNLUEngine(config=CONFIG_EN) nlu_engine = nlu_engine.fit(training_json) try: nlu_engine.persist(engine_path) except PersistingError: shutil.rmtree(engine_path) nlu_engine.persist(engine_path)
def __init__(self): print('Load NLU Engine') print( '-----------------------------------------------------------------' ) try: with io.open("oms_dataset.json") as f: dataset = json.load(f) except: print('I/O error({0}): {1}') sys.exit() load_resources('snips_nlu_en') self.__nlu_engine = SnipsNLUEngine(config=CONFIG_EN) self.__nlu_engine.fit(dataset) self.__nlu_engine.to_byte_array()