Beispiel #1
0
def create_nlp_model() -> SnipsNLUEngine:
    """
    This function trains a new ML model from the given dataset. It then saves the model in the root directory of the project with the file name: nlpumodel
    This function will only be called once, at the start of the program, if nlumodel file is not detected in the current directory
    Parameters required: None
    Return data: Trained SnipsNLUEngine object
    """
    # Creating a barebones engine
    engine = SnipsNLUEngine(config=CONFIG_EN)

    # Creating dataset from yaml files present in nlputrain directory
    data = dataset.Dataset.from_yaml_files(
        "en", ["./nlputrain/" + i for i in os.listdir("./nlputrain/") if ".yaml" in i]
    )

    # Training the engine with given dataset
    engine.fit(data)

    # Persisting the engine so it can be used easily later
    # Persisting engine is saved in nlumodel folder
    try:
        engine.persist("nlumodel")
    except PersistingError:
        print("Old NLP file still exists. Deleting..")
        # Removing old model files using shutil
        shutil.rmtree("nlumodel")
        engine.persist("nlumodel")

    print("NLP model has been created and saved in directory: nlumodel")
    # Returning trained engine
    return engine
Beispiel #2
0
def init_snipsnlu():
        # engine = SnipsNLUEngine(config=CONFIG_EN)
        engine = SnipsNLUEngine(resources=load_resources("snips_nlu_en"))
        with io.open("proj.json") as f:
            dataset = json.load(f)
            engine.fit(dataset)
        return engine
Beispiel #3
0
class NLUTrainer:
    def __init__(self, lang, config_file):
        self.__lang = lang
        self.__config_file = config_file
        self.__ongoing_training = False

        load_resources(self.__lang)

        with io.open(self.__config_file) as f:
            self.__config = json.load(f)

        self.__engine = SnipsNLUEngine(config=self.__config)

    @property
    def ongoing_training(self):
        return self.__ongoing_training

    def train(self, json_dataset, trained_engine_file):
        self.__ongoing_training = True
        with io.open(json_dataset) as f:
            dataset = json.load(f)

        self.__engine.fit(dataset)

        engine_json = json.dumps(self.__engine.to_dict())
        with io.open(trained_engine_file, mode="w") as f:
            f.write(engine_json)

        self.__ongoing_training = False
        return
Beispiel #4
0
class SnipsService(ApiService):
    def __init__(self,
                 classes,
                 model_path=None,
                 max_api_calls=None,
                 verbose=False):
        super().__init__(classes, max_api_calls, verbose)
        load_resources('en')
        if model_path:
            self.load_model(model_path)
        else:
            self.engine = SnipsNLUEngine(config=CONFIG_EN)

    def train_model(self, dataset):
        self.engine.fit(dataset)

    def train_model_from_file(self, dataset_path):
        with io.open(dataset_path) as f:
            self.train_model(json.load(f))

    def save_model(self, model_path):
        self.engine.persist(model_path)

    def load_model(self, model_path):
        self.engine = SnipsNLUEngine.from_path(model_path)

    def predict(self, utterance):
        result = self.engine.parse(utterance)
        try:
            return result['intent']['intentName']
        except Exception as e:
            print('ERR:', e)
            print('Failed to parse: "{}"'.format(utterance))
            print(result)
            return None
Beispiel #5
0
def train_model(model_file_path):
    global nlu_engine
    print("reading model at {}".format(model_file_path))
    with io.open(model_file_path) as f:
        model = json.load(f)
        nlu_engine = SnipsNLUEngine(config=CONFIG_EN)
        print("training model")
        nlu_engine.fit(model)
Beispiel #6
0
def nluparse(text):
    load_resources(sample_dataset["language"])
    nlu_engine = SnipsNLUEngine(config=config)
    nlu_engine.fit(sample_dataset)

    # text = "Show me jobs in LA for today"
    parsing = nlu_engine.parse(text)
    return json.dumps(parsing, indent=2)
Beispiel #7
0
def conversacion(m):

    with io.open("training.json") as f:
        sample_dataset = json.load(f)

    load_resources("en")
    nlu_engine = SnipsNLUEngine(config=CONFIG_EN)
    nlu_engine.fit(sample_dataset)

    text = (u"" + m.text.lower() + "")
    listaResultado = nlu_engine.parse(text)
    return procesarRespuesta(listaResultado)
def train_eval_snips_nlu_model(lang='en', cross=False, save=''):
    """ Train snips data from all brat annotation object 

    :param lang: abbreviate language name 
    :param save: path where model will be save
    :return: None
    :rtype: None
    """
    from snips_nlu import SnipsNLUEngine
    from snips_nlu.default_configs import CONFIG_EN
    from snips_nlu_metrics import compute_train_test_metrics, compute_cross_val_metrics
    import pickle
    import json

    if cross:
        train_data_obj = BuildSnipsDataTask1(lang, cross=cross, vers=save)
        train_data = train_data_obj.build_snips_data_task1()
        print("--> Evaluating training data with Snips metrics...")
        filename_results = source_result / "snips_semeval_2020_evaluation_task1_{}.pkl".format(save)
        if not Path(filename_results).exists():
            tt_metrics = compute_train_test_metrics(train_dataset=train_data[0],
                                                test_dataset=train_data[1],
                                                engine_class=SnipsNLUEngine,
                                                include_slot_metrics=False)
            #print(tt_metrics)
            if not Path(filename_results).exists():
                print("--> Writing snips nlu metrics data to file...")
                with codecs.open(filename_results, 'wb') as metric:
                    pickle.dump(tt_metrics, metric)
                from datetime import datetime
                dmtime = "_{}_{}".format(save, datetime.now().strftime("%Y%m%d-%H%M%S"))
                name = "snips_semeval_2020_evaluation_task1{}.json".format(dmtime)
                filename_results_json = source_result / name
                with codecs.open(filename_results_json, 'w', "utf-8") as m_json:
                    json.dump(tt_metrics, m_json)

    else:
        filename_results = source_result / "snips_semeval_2020_model_task1_{}".format(save)
        train_data_obj = BuildSnipsDataTask1(lang, cross=cross, vers=save)
        train_data = train_data_obj.build_snips_data_task1()
        nlu_engine = SnipsNLUEngine(config=CONFIG_EN)
        print("--> Training patent data with Snips...")
        nlu_engine.fit(train_data)
        try:     
            print("--> Saving model trained with Snips (JOBLIB)...")
            filename_joblib = source_result / "snips_semeval_2020_model_task1_{}.pkl".format(save)            
            with codecs.open(filename_joblib, 'wb') as metric:
                pickle.dump(nlu_engine, metric)
        except: pass
        print("--> Saving model trained with Snips (SNIPS)...")
        try: nlu_engine.persist(filename_results)
        except: pass
Beispiel #9
0
def train(dataset_file_path, train_directory):
    with io.open(dataset_file_path) as f:
        dataset = json.load(f)

    language = dataset.get("language", None)
    config = language_configs.get(language, None)
    if config is None:
        raise Exception(
            f"No language configuration for language {dataset.language}")

    nlu_engine = SnipsNLUEngine(config=config)
    nlu_engine.fit(dataset)
    nlu_engine.persist(train_directory)
def runEngine(query):
    with io.open("dataset.json") as f:
        dataset = json.load(f)

    load_resources("en")

    #with io.open("config_en.json") as f:
    #    config = json.load(f)

    #engine = SnipsNLUEngine(config=config)
    engine = SnipsNLUEngine(config=CONFIG_EN)

    engine.fit(dataset)

    parsing = engine.parse(query)
    return json.dumps(parsing, indent=2)
Beispiel #11
0
def get_nlu_engine(dataset_json_path: str) -> SnipsNLUEngine:
    with io.open(dataset_json_path) as f:
        dataset = json.load(f)

    nlu_engine = SnipsNLUEngine(config=CONFIG_EN)
    nlu_engine = nlu_engine.fit(dataset)
    return nlu_engine
Beispiel #12
0
class NLU:
    def __init__(self):
        self.sample_dataset = NLU.load_dataset()

    @staticmethod
    def load_dataset():
        """
        Load the sample dataset which will be used to train the snipsnlu NLP
        engine.
        :return:
        """
        BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))

        try:
            # with io.open(str(Path('app', 'static', 'samples', 'sample_dataset.json'))) as fr:
            #     sample_dataset = json.load(fr)
            # return sample_dataset
            with open(f'{BASE_DIR}/static/sample/test.json',
                      'r',
                      encoding='utf-8') as fr:
                sample_dataset = json.load(fr)
            return sample_dataset

        except Exception as e:
            print("Could not load dataset {}".format(str(e)))

    def train_engine(self):
        """
        # Setup the snipsnlu NLP engine and pass the training data.
        :return:
        """
        load_resources("en")
        self.nlu_engine = SnipsNLUEngine(config=CONFIG_EN)
        self.nlu_engine.fit(self.sample_dataset)

    def parse_sentence(self, sentence):
        """
        Get the sentence and parse it to get the result.
        The sentence is a query made in any natural language(for now we are
        setting this language as english) and the result is the json string
        with the parsed help of trained engine and the possible correct
        prediction of what the query actually meant.
        :param sentence:
        :return:
        """
        parsing = self.nlu_engine.parse(sentence)
        return parsing
Beispiel #13
0
    def __init__(self):
        load_resources(u"en")
        engine = SnipsNLUEngine(config=CONFIG_EN)
        with io.open(SnipHandler.dataset_path) as f:
            data_set = json.load(f)

        self.engine = engine.fit(data_set)

        SnipHandler.__instance__ = self
Beispiel #14
0
def imprimirmensaje(message):
    campo = ""
    predicado = ""
    lista = []
    chatid = message.chat.id
    with io.open('dataset.json') as file:
        dataset = json.load(file)
    engine = SnipsNLUEngine()
    engine.fit(dataset)
    parsing = engine.parse(unicode(message.text))
    temp = json.dumps(parsing, indent=2)
    #   try:

    intentName = parsing["intent"]["intentName"]
    entidad = ""
    try:
        enti = parsing["slots"][0]
        entidad = enti["rawValue"]
    except:
        pass
    if len(intentName) > 0 and len(entidad) > 0:
        print("good")

        if intentName == "descripcion":
            print("si entraaaaaaaaaaaaaaaaaaa")
            predicado = "http://usefulinc.com/ns/doap#description"
            campo = consultaSparql1(entidad, predicado)
        elif intentName == "imagen":
            predicado = "http://schema.org/image"
            campo = consultaSparql1(entidad, predicado)
        elif intentName == "igualEn":
            predicado = "http://www.w3.org/2002/07/owl#sameAs"
            campo = consultaSparql1(entidad, predicado)
        elif intentName == "igualKi":
            predicado = "http://www.w3.org/2002/07/owl#sameAs"
            campo = consultaSparql1(entidad, predicado)
        elif intentName == "nombreCientifico":
            predicado = "http://lod.taxonconcept.org/ontology/txn.owl#scientificName"
            campo = consultaSparql1(entidad, predicado)
        bot.send_message(chatid, campo)
    else:
        print("bad")
        lista = consultaSparql()
        bot.send_message(message, "perro")
Beispiel #15
0
def train():
    """
    Opens all JSON files in DATASET_PATH, fits them to an nlu-engine, and saves the engine at TRAINED_ENGINE_PATH.

    :return: Nothing.
    """
    file_paths = [
        join(DATASET_PATH, file) for file in os.listdir(DATASET_PATH)
        if isfile(join(DATASET_PATH, file))
    ]
    nlu_engine = SnipsNLUEngine(CONFIG_EN)

    for file_path in file_paths:
        with io.open(file_path, encoding="utf-8") as file:
            sample_dataset = json.load(file)

        nlu_engine.fit(sample_dataset)

    save_engine(nlu_engine)
Beispiel #16
0
 def loadntrain(self, rootpath='./datasets/*.json'):
     paths = sorted(glob.glob(rootpath))
     self.models = []
     for i, dset in enumerate(paths):
         with io.open(dset) as f:
             dataset = json.load(f)
         model = SnipsNLUEngine(config=CONFIG_EN)
         model = model.fit(dataset)
         self.models.append(model)
         print(f"{i+1}. Trained for {dset}")
     print(f"Training for {len(paths)} datasets completed")
Beispiel #17
0
def main():
    # Load dataset for parsing
    with open(DATASET_PATH) as f:
        dataset = json.load(f)

    global publisher
    global nlu_engine
    nlu_engine = SnipsNLUEngine(config=CONFIG_EN)
    nlu_engine.fit(dataset)

    # Initialize Ros node and the Topic subscriber and publisher
    rospy.init_node(str(ROS_NODE_NAME))

    publisher = rospy.Publisher(str(ROS_PUBLISHER_TOPIC),
                                String,
                                queue_size=10)
    rospy.Subscriber(str(ROS_SUBSCRIBER_TOPIC), String, callback)
    print("ROS node '%s' started. Listening from '%s' (ctrl-C to exit)..." %
          (ROS_NODE_NAME, ROS_SUBSCRIBER_TOPIC))
    rospy.spin()
    print("Ctrl-C received. Shutting down ROS node '%s'!" % ROS_NODE_NAME)
Beispiel #18
0
class LanguageParser(pluginmanager.IPlugin, PluginStorage):
    """
      interface to parse input text
    """
    def __init__(self):
        super(pluginmanager.IPlugin, self).__init__()
        self._plugins = {}
        self._pre_train_json = dict()
        self._pre_train_json['intents'] = {}
        self._pre_train_json['entities'] = {}
        self._pre_train_json['language'] = 'en'
        self.nlu_engine = SnipsNLUEngine()

    def train(self, plugins):
        self._generate_pre_train_json(plugins)
        self.nlu_engine.fit(self._pre_train_json)

    def _generate_pre_train_json(self, plugins):
        for plugin in plugins:
            intent = dict()
            intent['utterances'] = list()
            _data = list()
            _data.append(dict({'text': plugin.get_name()}))
            intent['utterances'].append(dict({"data": _data}))
            intent_name = '_'.join(re.findall(r"[\w']+", plugin.get_name()))
            self._pre_train_json['intents'][intent_name] = intent
            self._plugins[intent_name] = plugin

            # handle sub commands (recursive)
            self._generate_pre_train_json(plugin.get_plugins().values())

    def identify_action(self, action):
        parsed_action = self.nlu_engine.parse(action)
        print(parsed_action)
        intent_name = parsed_action['intent']['intentName']
        if intent_name not in self._plugins:
            return None
        return self._plugins[intent_name]
Beispiel #19
0
def make_nlu_model_json(fname):
    docs = yaml.load_all(stream)
    ll = []
    for doc in docs:
        i = {}
        for k, v in doc.items():
            i[k] = v
        ll.append(i)

    dataset = Dataset.from_yaml_files("en", [ll])

    nlu_engine = SnipsNLUEngine(config=CONFIG_EN)
    nlu_engine = nlu_engine.fit(dataset)
    text = "Please turn the light on in the kitchen"
    parsing = nlu_engine.parse(text)
Beispiel #20
0
    class __impl:

        __nlu_engine = None

        def __init__(self):
            print('Load NLU Engine')
            print(
                '-----------------------------------------------------------------'
            )

            try:
                with io.open("oms_dataset.json") as f:
                    dataset = json.load(f)
            except:
                print('I/O error({0}): {1}')
                sys.exit()

            load_resources('snips_nlu_en')
            self.__nlu_engine = SnipsNLUEngine(config=CONFIG_EN)
            self.__nlu_engine.fit(dataset)
            self.__nlu_engine.to_byte_array()

        def parse_text(self, text):
            return self.__nlu_engine.parse(text)
Beispiel #21
0
def init(seed):
    with io.open("Dataset/dataset.json",encoding="utf8") as f:
        sample_dataset = json.load(f)

    print("initialising  Lynda")

    from snips_nlu import SnipsNLUEngine

    from snips_nlu.default_configs import CONFIG_EN

    nlu_engine = SnipsNLUEngine(config=CONFIG_EN, random_state=seed)                                                  #training the nlu model


    nlu_engine.fit(sample_dataset)

    print("momdel created")





    nlu_engine.persist('model')                                                                                        # saving the trained model

    print("model dumped")
Beispiel #22
0
def main():
    """
    This builds a training dataset, trains an NLU engine with it, and saves that engine. This must be done any time a
    new command is added or utterances are edited for a command. If an NLU engine already exists, it is deleted.
    """
    training_json = json.loads(build_training_dataset())
    engine_path = os.path.join('Voithos', 'utilities', 'NLU')

    nlu_engine = SnipsNLUEngine(config=CONFIG_EN)
    nlu_engine = nlu_engine.fit(training_json)

    try:
        nlu_engine.persist(engine_path)
    except PersistingError:
        shutil.rmtree(engine_path)
        nlu_engine.persist(engine_path)
Beispiel #23
0
def train_nlu():
    with io.open("training_data/dataset.json") as f:
        sample_dataset = json.load(f)

    nlu_engine = SnipsNLUEngine()

    print("Snips training started")
    train_start = datetime.datetime.now()
    nlu_engine = nlu_engine.fit(sample_dataset)

    if os.path.exists(model_path):
        shutil.rmtree(model_path, ignore_errors=True)
        nlu_engine.persist(model_path)
    else:
        nlu_engine.persist(model_path)

    train_end = datetime.datetime.now()
    print("Total time to train the Snips model: {0}".format(train_end -
                                                            train_start))
def main():
    # See http://g.co/cloud/speech/docs/languages
    # for a list of supported languages.
    # 언어 설정코드 변경 가능
    language_code = 'ko-KR'  # a BCP-47 language tag

    # 언어 데이터셋을 Snips NLU에 넣어주기
    with io.open("./lights_dataset_train_ko.json", encoding="utf8") as f:
        sample_dataset = json.load(f)

    nlu_engine = SnipsNLUEngine(config=CONFIG_KO)
    nlu_engine = nlu_engine.fit(sample_dataset)

    print("성공")

    # 전처리 종료

    # 음성인식 시작

    client = speech.SpeechClient()
    config = types.RecognitionConfig(
        encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=RATE,
        language_code=language_code)
    streaming_config = types.StreamingRecognitionConfig(config=config,
                                                        interim_results=True)

    with MicrophoneStream(RATE, CHUNK) as stream:
        audio_generator = stream.generator()
        requests = (types.StreamingRecognizeRequest(audio_content=content)
                    for content in audio_generator)

        responses = client.streaming_recognize(streaming_config, requests)

        # Now, put the transcription responses to use.
        # 무한루프 시작
        listen_print_loop(responses, nlu_engine)
Beispiel #25
0
def imprimirmensaje(message):
    campo = ""
    predicado = ""
    bandera1 = False
    chatid = message.chat.id
    with io.open('dataset.json') as file:
        dataset = json.load(file)
    engine = SnipsNLUEngine()
    engine.fit(dataset)
    parsing = engine.parse(unicode(message.text))
    entidad = ""
    intentName = ""
    try:
        intentName = parsing["intent"]["intentName"]
        try:
            enti = parsing["slots"][0]
            entidad = enti["rawValue"]
        except:
            bandera1 = True
        if len(intentName) > 0 and len(entidad) > 0:
            print("good")

            if intentName == "descripcion":
                print("si entraaaaaaaaaaaaaaaaaaa")
                predicado = "http://usefulinc.com/ns/doap#description"
                campo = consultaSparql1(entidad, predicado)

            elif intentName == "imagen":
                predicado = "http://schema.org/image"
                campo = consultaSparql1(entidad, predicado)
                campo = ("La imagen de " + entidad + " es: " + campo[0])
            elif intentName == "igualEn":
                predicado = "http://www.w3.org/2002/07/owl#sameAs"
                campo = consultaSparql2(entidad, predicado, "en")
                campo = ("La traduccion de " + entidad + " en ingles es: " +
                         campo[0])
            elif intentName == "igualKi":
                predicado = "http://www.w3.org/2002/07/owl#sameAs"
                campo = consultaSparql2(entidad, predicado, "ki")
                campo = ("La traduccion de " + entidad + " en kitchwa es: " +
                         campo[0])
            elif intentName == "nombreCientifico":
                predicado = "http://lod.taxonconcept.org/ontology/txn.owl#scientificName"
                campo = consultaSparql1(entidad, predicado)
                campo = ("El nombre cientifico de " + entidad + " es: " +
                         campo[0])
            mi_bot.reply_to(message, campo)
        elif intentName == "allAnimal":
            cam = consultaSparql()
            campo = ""
            for ca in cam:
                campo += ca + ", "
            mi_bot.reply_to(message, campo)
        else:
            print(entidad)
            print(intentName)
            print("bad")

        if bandera1:
            print("¿Dime de animal deseas saber la descripción?")

    #except Exception,e: print str(e)
    except:
        campo = "No entiendo tu pregunta, puedes preguntarme de nuevo"
        mi_bot.reply_to(message, campo)
Beispiel #26
0
from __future__ import unicode_literals, print_function

import io
import json

from snips_nlu import SnipsNLUEngine, load_resources
from snips_nlu.default_configs import CONFIG_EN, CONFIG_DE

with io.open("lights_dataset_de.json") as f:
    sample_dataset = json.load(f)

# load_resources("en")
# nlu_engine = SnipsNLUEngine(config=CONFIG_EN)
load_resources("de")
nlu_engine = SnipsNLUEngine(config=CONFIG_DE)
nlu_engine.fit(sample_dataset)
nlu_engine.persist("models/current")

Beispiel #27
0
import json
from pprint import pprint
from snips_nlu import load_resources, SnipsNLUEngine

data = json.load(open('./dataset.json', 'r+'))
load_resources('en')

nlu_engine = SnipsNLUEngine(config=json.load(open('./config_en.json', 'r+')))
nlu_engine.fit(data)

with open('./trained_engine.json', 'w+') as f:
    f.write(json.dumps(nlu_engine.to_dict()))
Beispiel #28
0
import io
import json
from snips_nlu import SnipsNLUEngine
from snips_nlu.default_configs import CONFIG_EN
from pathlib import Path

#logo = Path('project_4\logo.txt').read_text()
#print(logo)
#gen_help = Path("project_4\help.txt").read_text()
#print(gen_help)
#welcome = open('project_4\welcome.txt', "r").read()

with io.open("project_4\dataset.json") as f:
    dataset = json.load(f)
nlu_engine = SnipsNLUEngine(config=CONFIG_EN)
nlu_engine = nlu_engine.fit(dataset)

############# Custom classes

#from Character as Character
import funcs
import Layout
Layout = Layout.Layout
import Info
Info = Info.Info
import Store
import Character
Character = Character.Character
#################

funcs.hum_type(logo, speed=550)
Beispiel #29
0
@author: yanni
"""

import io
import json

from snips_nlu import SnipsNLUEngine

path = '/Users/yanni/PycharmProjects/chatbot/src/'
### train Slots Detection Model

#!snips-nlu generate-dataset en {path}/Movie_intent.yaml {path}/Movie_entity.yaml  > {path}/Movie_dataset.json
with io.open(path + 'Movie_dataset.json') as f:
    sample_dataset = json.load(f)

nlu_engine = SnipsNLUEngine()

nlu_engine.fit(sample_dataset)

nlu_engine.persist(path + 'Movie_Slots_Detection')

#!snips-nlu generate-dataset en {path}/Aspect_intent.yaml {path}/Aspect_entity.yaml  > {path}/Aspect_dataset.json
with io.open(path + 'Aspect_dataset.json') as f:
    aspect_dataset = json.load(f)

nlu_engine = SnipsNLUEngine()

nlu_engine.fit(aspect_dataset)

nlu_engine.persist(path + 'Aspect_Slots_Detection')
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import sys
import json
from snips_nlu import load_resources, SnipsNLUEngine
from snips_nlu.default_configs import CONFIG_ES
import io

# creacion del archivo de entrenamineto
load_resources("es")
reload(sys)
sys.setdefaultencoding('utf8')
with io.open("dataset.json") as f:
    dataset = json.load(f)
engine = SnipsNLUEngine(config=CONFIG_ES)
engine.fit(dataset)
engine_json = json.dumps(engine.to_dict())
with io.open("trained.json", mode="w") as f:
    f.write(unicode(engine_json))
Beispiel #31
0
from __future__ import unicode_literals, print_function

import io
import json

from snips_nlu import SnipsNLUEngine, load_resources

with io.open("sample_dataset.json") as f:
    sample_dataset = json.load(f)

with io.open("configs/config_en.json") as f:
    config = json.load(f)

load_resources(sample_dataset["language"])
nlu_engine = SnipsNLUEngine(config=config)
nlu_engine.fit(sample_dataset)

text = "What will be the weather in San Francisco next week?"
parsing = nlu_engine.parse(text)
print(json.dumps(parsing, indent=2))