def train_snips_nlu(train, version, config=DEFAULT_SNIPS_CONFIG): from snips_nlu import load_resources from de.core.snips_custom_units.de_nlu_engine import DESnipsNLUEngine interpreter = DESnipsNLUEngine(config) snips_intents = {} snips_entities = {} training_data = {'language': 'en', 'intents': snips_intents, 'entities': snips_entities} intents = train['intent'].unique().tolist() train = dict(tuple(train.groupby('intent'))) for intent in intents: utterances = [] snips_intent = {'utterances': utterances} snips_intents[intent] = snips_intent intent_phrases = train[intent]['utterance'] for phrase in intent_phrases: items = [] items.append({'text': phrase}) utterance = {'data': items} utterances.append(utterance) load_resources("en") print('Training model...') interpreter.fit(training_data) model = interpreter.to_dict() print('Saving model...') save_obj(model,'models/snips/benchmark/', version)
def cross_val_metrics(dataset_path, output_path, nb_folds=5, train_size_ratio=1.0, exclude_slot_metrics=False, include_errors=False): def progression_handler(progress): print("%d%%" % int(progress * 100)) metrics_args = dict( dataset=dataset_path, engine_class=SnipsNLUEngine, progression_handler=progression_handler, nb_folds=nb_folds, train_size_ratio=train_size_ratio, include_slot_metrics=not exclude_slot_metrics, ) with Path(dataset_path).open("r", encoding="utf8") as f: load_resources(json.load(f)["language"]) from snips_nlu_metrics import compute_cross_val_metrics metrics = compute_cross_val_metrics(**metrics_args) if not include_errors: metrics.pop("parsing_errors") with Path(output_path).open(mode="w", encoding="utf8") as f: f.write(json_string(metrics))
def EngineMode(self, mode): """ Saving the engine to use the model for every question (Training Part) or Use the model if it already exists (Testing Part) :param mode: Test or Train string :return: Fitted Engine """ if mode == "Train": load_resources('snips_nlu_en') self.__engine = SnipsNLUEngine(config=CONFIG_EN) with io.open("dataset.json") as f: self.__dataset = json.load(f) self.__engine.fit(self.__dataset) #UnComment to save the model #self.__engine.persist("Z:\FCIS-ASU\Semester 8\ChatbotModel") elif mode == "Test": with io.open("dataset.json") as f: self.__dataset = json.load(f) self.__engine = SnipsNLUEngine.from_path( "Z:\FCIS-ASU\Semester 8\ChatbotModel")
def main_cross_val_metrics(): args = vars(parse_cross_val_args(sys.argv[1:])) dataset_path = args.pop("dataset_path") output_path = args.pop("output_path") def progression_handler(progress): print("%d%%" % int(progress * 100)) metrics_args = dict(dataset=dataset_path, engine_class=SnipsNLUEngine, progression_handler=progression_handler) if args.get("nb_folds") is not None: nb_folds = args.pop("nb_folds") metrics_args.update(dict(nb_folds=nb_folds)) if args.get("train_size_ratio") is not None: train_size_ratio = args.pop("train_size_ratio") metrics_args.update(dict(train_size_ratio=train_size_ratio)) include_errors = args.get("include_errors", False) with io.open(dataset_path, "r", encoding="utf-8") as f: load_resources(json.load(f)["language"]) metrics = compute_cross_val_metrics(**metrics_args) if not include_errors: metrics.pop("parsing_errors") with io.open(output_path, mode="w") as f: f.write(bytes(json.dumps(metrics), encoding="utf8").decode("utf8"))
def main_train_test_metrics(): args = vars(parse_train_test_args(sys.argv[1:])) train_dataset_path = args.pop("train_dataset_path") test_dataset_path = args.pop("test_dataset_path") output_path = args.pop("output_path") exclude_slot_metrics = args.get("exclude_slot_metrics", False) metrics_args = dict( train_dataset=train_dataset_path, test_dataset=test_dataset_path, engine_class=SnipsNLUEngine, include_slot_metrics=not exclude_slot_metrics ) include_errors = args.get("include_errors", False) with io.open(train_dataset_path, "r", encoding="utf-8") as f: load_resources(json.load(f)["language"]) metrics = compute_train_test_metrics(**metrics_args) if not include_errors: metrics.pop("parsing_errors") with io.open(output_path, mode="w") as f: f.write(bytes(json.dumps(metrics), encoding="utf8").decode("utf8"))
def main_cross_val_metrics(): args = vars(parse_cross_val_args(sys.argv[1:])) dataset_path = args.pop("dataset_path") output_path = args.pop("output_path") def progression_handler(progress): print("%d%%" % int(progress * 100)) metrics_args = dict( dataset=dataset_path, engine_class=SnipsNLUEngine, progression_handler=progression_handler ) if args.get("nb_folds") is not None: nb_folds = args.pop("nb_folds") metrics_args.update(dict(nb_folds=nb_folds)) if args.get("train_size_ratio") is not None: train_size_ratio = args.pop("train_size_ratio") metrics_args.update(dict(train_size_ratio=train_size_ratio)) include_errors = args.get("include_errors", False) with io.open(dataset_path, "r", encoding="utf-8") as f: load_resources(json.load(f)["language"]) metrics = compute_cross_val_metrics(**metrics_args) if not include_errors: metrics.pop("parsing_errors") with io.open(output_path, mode="w") as f: f.write(bytes(json.dumps(metrics), encoding="utf8").decode("utf8"))
def train_test_metrics(train_dataset_path, test_dataset_path, output_path, config_path=None, exclude_slot_metrics=False, include_errors=False, verbose=False): if verbose: set_nlu_logger(logging.DEBUG) if config_path is not None: with Path(config_path).open("r", encoding="utf-8") as f: config = json.load(f) engine_cls = make_engine_cls(config) else: engine_cls = SnipsNLUEngine metrics_args = dict(train_dataset=train_dataset_path, test_dataset=test_dataset_path, engine_class=engine_cls, include_slot_metrics=not exclude_slot_metrics) with Path(train_dataset_path).open("r", encoding="utf8") as f: load_resources(json.load(f)["language"]) from snips_nlu_metrics import compute_train_test_metrics metrics = compute_train_test_metrics(**metrics_args) if not include_errors: metrics.pop("parsing_errors") with Path(output_path).open(mode="w", encoding="utf8") as f: f.write(json_string(metrics))
def main_train_test_metrics(): args = vars(parse_train_test_args(sys.argv[1:])) train_dataset_path = args.pop("train_dataset_path") test_dataset_path = args.pop("test_dataset_path") output_path = args.pop("output_path") exclude_slot_metrics = args.get("exclude_slot_metrics", False) metrics_args = dict( train_dataset=train_dataset_path, test_dataset=test_dataset_path, engine_class=SnipsNLUEngine, include_slot_metrics=not exclude_slot_metrics ) include_errors = args.get("include_errors", False) with io.open(train_dataset_path, "r", encoding="utf-8") as f: load_resources(json.load(f)["language"]) from snips_nlu_metrics import compute_train_test_metrics metrics = compute_train_test_metrics(**metrics_args) if not include_errors: metrics.pop("parsing_errors") with io.open(output_path, mode="w") as f: json_dump = json.dumps(metrics, sort_keys=True, indent=2) f.write(bytes(json_dump, encoding="utf8").decode("utf8"))
def nluparse(text): load_resources(sample_dataset["language"]) nlu_engine = SnipsNLUEngine(config=config) nlu_engine.fit(sample_dataset) # text = "Show me jobs in LA for today" parsing = nlu_engine.parse(text) return json.dumps(parsing, indent=2)
def train_engine(self): """ # Setup the snipsnlu NLP engine and pass the training data. :return: """ load_resources("en") self.nlu_engine = SnipsNLUEngine(config=CONFIG_EN) self.nlu_engine.fit(self.sample_dataset)
def parse(event, content): s3 = boto3.resource('s3') obj = s3.Object(os.environ.get('RESOURCE_BUCKET'), 'trained_engine.json') load_resources('en') trained_model = json.load(obj.get().get('Body')) engine = SnipsNLUEngine.from_dict(trained_model) return engine.parse(event.get('statement'))
def __init__(self): load_resources(u"en") engine = SnipsNLUEngine(config=CONFIG_EN) with io.open(SnipHandler.dataset_path) as f: data_set = json.load(f) self.engine = engine.fit(data_set) SnipHandler.__instance__ = self
def __init__(self, lang, trained_engine_file): self.__lang = lang self.__trained_engine_file = trained_engine_file load_resources(self.__lang) with io.open(self.__trained_engine_file) as f: engine_dict = json.load(f) self.__loaded_engine = SnipsNLUEngine.from_dict(engine_dict)
def __init__(self, database_context, cos_context): #Cloudant DB self.context = database_context self.training_data = {} self.cos_context = cos_context self._check_trainer_dir(ENGINE_PATH_ZIP) load_resources("de") load_resources("en") self.nlu_engine = SnipsNLUEngine()
def __init__(self, lang, config_file): self.__lang = lang self.__config_file = config_file self.__ongoing_training = False load_resources(self.__lang) with io.open(self.__config_file) as f: self.__config = json.load(f) self.__engine = SnipsNLUEngine(config=self.__config)
def __init__(self, classes, model_path=None, max_api_calls=None, verbose=False): super().__init__(classes, max_api_calls, verbose) load_resources('en') if model_path: self.load_model(model_path) else: self.engine = SnipsNLUEngine(config=CONFIG_EN)
def load_data(): SAMPLE_DATASET_PATH = Path(__file__).parent / "dataset.json" with SAMPLE_DATASET_PATH.open() as f: sample_dataset = json.load(f) load_resources("snips_nlu_en") load_data.nlu_engine = SnipsNLUEngine(config=CONFIG_EN) load_data.nlu_engine.fit(sample_dataset) load_data.nlp = spacy.load('en_core_web_sm') #print(nlu_engine) print("resources Loaded ......")
def conversacion(m): with io.open("training.json") as f: sample_dataset = json.load(f) load_resources("en") nlu_engine = SnipsNLUEngine(config=CONFIG_EN) nlu_engine.fit(sample_dataset) text = (u"" + m.text.lower() + "") listaResultado = nlu_engine.parse(text) return procesarRespuesta(listaResultado)
def debug_inference(engine_path): with Path(engine_path).open("r", encoding="utf8") as f: engine_dict = json.load(f) load_resources(engine_dict["dataset_metadata"]["language_code"]) engine = SnipsNLUEngine.from_dict(engine_dict) while True: query = input("Enter a query (type 'q' to quit): ").strip() if isinstance(query, bytes): query = query.decode("utf8") if query == "q": break print(json.dumps(engine.parse(query), indent=2))
def debug_inference(engine_path): with io.open(os.path.abspath(engine_path), "r", encoding="utf8") as f: engine_dict = json.load(f) load_resources(engine_dict["dataset_metadata"]["language_code"]) engine = SnipsNLUEngine.from_dict(engine_dict) while True: query = input("Enter a query (type 'q' to quit): ").strip() if isinstance(query, bytes): query = query.decode("utf8") if query == "q": break print(json.dumps(engine.parse(query), indent=2))
def init_snipsnlu(): # engine = SnipsNLUEngine(config=CONFIG_EN) engine = SnipsNLUEngine(resources=load_resources("snips_nlu_en")) with io.open("proj.json") as f: dataset = json.load(f) engine.fit(dataset) return engine
def runEngine(query): with io.open("dataset.json") as f: dataset = json.load(f) load_resources("en") #with io.open("config_en.json") as f: # config = json.load(f) #engine = SnipsNLUEngine(config=config) engine = SnipsNLUEngine(config=CONFIG_EN) engine.fit(dataset) parsing = engine.parse(query) return json.dumps(parsing, indent=2)
def test_should_not_load_resources_when_provided( self, mocked_load_resources): # Given dataset_stream = io.StringIO(""" --- type: intent name: MakeTea utterances: - make me a [beverage_temperature:Temperature](hot) cup of tea - make me [number_of_cups:snips/number](five) tea cups --- type: intent name: MakeCoffee utterances: - make me [number_of_cups:snips/number](one) cup of coffee please - brew [number_of_cups] cups of coffee""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json resources = load_resources("en") # When engine = SnipsNLUEngine(resources=resources) engine.fit(dataset) # Then mocked_load_resources.assert_not_called()
def test_should_not_build_custom_parser_when_provided(self): # Given dataset_stream = io.StringIO(""" --- type: intent name: MakeTea utterances: - make me a [beverage_temperature:Temperature](hot) cup of tea - make me [number_of_cups:snips/number](five) tea cups --- type: intent name: MakeCoffee utterances: - make me [number_of_cups:snips/number](one) cup of coffee please - brew [number_of_cups] cups of coffee""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json resources = load_resources("en") custom_entity_parser = CustomEntityParser.build( dataset, CustomEntityParserUsage.WITH_AND_WITHOUT_STEMS, resources) # When with patch("snips_nlu.entity_parser.custom_entity_parser" ".CustomEntityParser.build") as mocked_build_parser: engine = SnipsNLUEngine( custom_entity_parser=custom_entity_parser) engine.fit(dataset) # Then mocked_build_parser.assert_not_called()
def __init__(self): print('Load NLU Engine') print( '-----------------------------------------------------------------' ) try: with io.open("oms_dataset.json") as f: dataset = json.load(f) except: print('I/O error({0}): {1}') sys.exit() load_resources('snips_nlu_en') self.__nlu_engine = SnipsNLUEngine(config=CONFIG_EN) self.__nlu_engine.fit(dataset) self.__nlu_engine.to_byte_array()
def load_data(): SAMPLE_DATASET_PATH = Path(__file__).parent / "dataset.json" with SAMPLE_DATASET_PATH.open() as f: sample_dataset = json.load(f) load_resources("snips_nlu_en") load_data.nlu_engine = SnipsNLUEngine(config=CONFIG_EN) load_data.nlu_engine.fit(sample_dataset) load_data.nlp = spacy.load('en_core_web_sm') #load_data.nlp2 = spacy.load('en_core_web_md', parser=False) load_data.model = KeyedVectors.load_word2vec_format( 'C:\demo\google vectors\GoogleNews-vectors-negative300.bin', binary=True) #print(nlu_engine) print("resources Loaded ......")
def __init__(self): # load language resources load_resources(u"en") # create NLU Engine self.engine = SnipsNLUEngine(config=CONFIG_EN) # train engine """ with io.open("dataset.json") as f: dataset = json.load(f) print('start training') self.engine.fit(dataset=dataset) print('finished training') self.engine.persist('nlu_engine') """ self.engine = SnipsNLUEngine.from_path('nlu_engine') print('snips engine ready')
def train(dataset_path, output_path, config_path): """Train an NLU engine on the provided dataset""" with Path(dataset_path).open("r", encoding="utf8") as f: dataset = json.load(f) config = None if config_path is not None: with Path(config_path).open("r", encoding="utf8") as f: config = json.load(f) load_resources(dataset["language"]) engine = SnipsNLUEngine(config).fit(dataset) print("Create and train the engine...") serialized_engine = bytes(json.dumps(engine.to_dict()), encoding="utf8") with Path(output_path).open("w", encoding="utf8") as f: f.write(serialized_engine.decode("utf8")) print("Saved the trained engine to %s" % output_path)
def parse(training_path, query): """Load a trained NLU engine and play with its parsing API interactively""" training_path = Path(training_path) with training_path.open("r", encoding="utf8") as f: engine_dict = json.load(f) language = engine_dict["dataset_metadata"]["language_code"] load_resources(language) engine = SnipsNLUEngine.from_dict(engine_dict) if query: print_parsing_result(engine, query) return while True: query = input("Enter a query (type 'q' to quit): ").strip() if query == "q": break print_parsing_result(engine, query)
def train(dataset_path, output_path, config_path): """Train an NLU engine on the provided dataset""" with Path(dataset_path).open("r", encoding="utf8") as f: dataset = json.load(f) config = None if config_path is not None: with Path(config_path).open("r", encoding="utf8") as f: config = json.load(f) load_resources(dataset["language"]) print("Create and train the engine...") engine = SnipsNLUEngine(config).fit(dataset) print("Persisting the engine...") engine.persist(output_path) print("Saved the trained engine to %s" % output_path)
def main_engine_inference(): args = vars(parse_inference_args(sys.argv[1:])) training_path = args.pop("training_path") with io.open(os.path.abspath(training_path), "r", encoding="utf8") as f: engine_dict = json.load(f) engine = SnipsNLUEngine.from_dict(engine_dict) language = engine._dataset_metadata[ # pylint: disable=protected-access "language_code"] load_resources(language) while True: query = input("Enter a query (type 'q' to quit): ").strip() if isinstance(query, bytes): query = query.decode("utf8") if query == "q": break print(json.dumps(engine.parse(query), indent=2))
def fit(self, data): data_lang = data.get('language') if data_lang != self.lang: self._logger.warning( 'Training language "%s" and interpreter language "%s" do not match, things could go badly' % (data_lang, self.lang)) self._logger.info('Fitting using "snips v%s"' % __version__) checksum = compute_checksum(data) cached_checksum = None # Try to load the used checksum if self.cache_directory: cached_checksum_path = os.path.join(self.cache_directory, 'trained.checksum') cached_checksum = read_file(cached_checksum_path, ignore_errors=True) if not cached_checksum: self._logger.debug('Checksum file not found') if checksum == cached_checksum: self.load_from_cache() else: config = None try: self._logger.info( 'Importing default configuration for language "%s"' % self.lang) config = getattr(snips_confs, 'CONFIG_%s' % self.lang.upper()) except AttributeError: self._logger.warning( 'Could not import default configuration, it will use the generic one instead' ) self._engine = SnipsNLUEngine(config, resources=load_resources( 'snips_nlu_%s' % self.lang)) self._engine.fit(data) if self.cache_directory: self._logger.info('Persisting trained engine to "%s"' % self.cache_directory) rmtree(self.cache_directory, ignore_errors=True) self._engine.persist(self.cache_directory) with open(cached_checksum_path, mode='w') as f: f.write(checksum) self._configure()
def fit(self, training_file_path, trained_directory_path): filename, _ = os.path.splitext(os.path.basename(training_file_path)) # TODO check what should be in the base Interpreter class trained_path = os.path.join(trained_directory_path, '%s.trained.json' % filename) checksum_path = os.path.join(trained_directory_path, '%s.checksum' % filename) with open(training_file_path) as f: training_str = f.read() self._training_data = json.loads(training_str) self._lang = self._training_data['language'] self._log.info('Loading resources for language %s' % self._lang) load_resources(self._lang) same, computed_checksum = self.checksum_match(training_str, checksum_path) # Checksums match, load the engine from trained file if same and os.path.isfile(trained_path): self._log.info('Checksum matched, loading trained engine') with open(trained_path) as f: self._engine = SnipsNLUEngine.from_dict(json.load(f)) else: self._log.info('Checksum has changed, retraining the engine') self._engine = SnipsNLUEngine() self._engine.fit(self._training_data) with open(trained_path, mode='w') as f: json.dump(self._engine.to_dict(), f) with open(checksum_path, mode='w') as f: f.write(computed_checksum) self._entity_parser = BuiltinEntityParser(self._lang) self._meta = { k: list(v.keys()) for k, v in self._engine._dataset_metadata['slot_name_mappings'].items() }
def debug_training(dataset_path, config_path=None): with io.open(os.path.abspath(dataset_path), "r", encoding="utf8") as f: dataset = json.load(f) load_resources(dataset["language"]) if config_path is None: config = NLUEngineConfig() else: with io.open(config_path, "r", encoding="utf8") as f: config = NLUEngineConfig.from_dict(json.load(f)) engine = SnipsNLUEngine(config).fit(dataset) while True: query = input("Enter a query (type 'q' to quit): ").strip() if isinstance(query, bytes): query = query.decode("utf8") if query == "q": break print(json.dumps(engine.parse(query), indent=2))
def main_train_engine(): args = vars(parse_train_args(sys.argv[1:])) dataset_path = args.pop("dataset_path") with io.open(dataset_path, "r", encoding="utf8") as f: dataset = json.load(f) if args.get("config_path") is not None: config_path = args.pop("config_path") with io.open(config_path, "r", encoding="utf8") as f: config = json.load(f) else: config = NLUEngineConfig() load_resources(dataset["language"]) engine = SnipsNLUEngine(config).fit(dataset) print("Create and train the engine...") output_path = args.pop("output_path") serialized_engine = bytes(json.dumps(engine.to_dict()), encoding="utf8") with io.open(output_path, "w", encoding="utf8") as f: f.write(serialized_engine.decode("utf8")) print("Saved the trained engine to %s" % output_path)
def main_train_test_metrics(): args = vars(parse_train_test_args(sys.argv[1:])) train_dataset_path = args.pop("train_dataset_path") test_dataset_path = args.pop("test_dataset_path") output_path = args.pop("output_path") metrics_args = dict( train_dataset=train_dataset_path, test_dataset=test_dataset_path, engine_class=SnipsNLUEngine ) include_errors = args.get("include_errors", False) with io.open(train_dataset_path, "r", encoding="utf-8") as f: load_resources(json.load(f)["language"]) metrics = compute_train_test_metrics(**metrics_args) if not include_errors: metrics.pop("parsing_errors") with io.open(output_path, mode="w") as f: f.write(bytes(json.dumps(metrics), encoding="utf8").decode("utf8"))
from __future__ import unicode_literals, print_function import io import json from snips_nlu import SnipsNLUEngine, load_resources with io.open("sample_dataset.json") as f: sample_dataset = json.load(f) with io.open("configs/config_en.json") as f: config = json.load(f) load_resources(sample_dataset["language"]) nlu_engine = SnipsNLUEngine(config=config) nlu_engine.fit(sample_dataset) text = "What will be the weather in San Francisco next week?" parsing = nlu_engine.parse(text) print(json.dumps(parsing, indent=2))
from __future__ import unicode_literals, print_function import io import json from os.path import dirname, abspath, join from snips_nlu import SnipsNLUEngine, load_resources from snips_nlu.default_configs import CONFIG_EN SAMPLE_DATASET_PATH = join(dirname(abspath(__file__)), "sample_dataset.json") with io.open(SAMPLE_DATASET_PATH) as f: sample_dataset = json.load(f) load_resources("en") nlu_engine = SnipsNLUEngine(config=CONFIG_EN) nlu_engine.fit(sample_dataset) text = "What will be the weather in San Francisco next week?" parsing = nlu_engine.parse(text) print(json.dumps(parsing, indent=2))