def test_main_train_engine(self): # Given args = [BEVERAGE_DATASET_PATH, self.tmp_file_path] with patch.object(sys, "argv", mk_sys_argv(args)): # When main_train_engine() # Then if not os.path.exists(self.tmp_file_path): self.fail("No trained engine generated") msg = "Failed to create an engine from engine dict." with self.fail_if_exception(msg): with io.open(self.tmp_file_path, "r", encoding="utf8") as f: trained_engine_dict = json.load(f) SnipsNLUEngine.from_dict(trained_engine_dict)
def test_sample_configs_should_work(self): # Given dataset = self.sample_dataset for language in get_all_languages(): # When config_file = "config_%s.json" % language config_path = os.path.join(SAMPLES_PATH, "configs", config_file) with io.open(config_path) as f: config = json.load(f) dataset[LANGUAGE] = language engine = SnipsNLUEngine(config).fit(dataset) result = engine.parse("Please give me the weather in Paris") # Then intent_name = result[RES_INTENT][RES_INTENT_NAME] self.assertEqual("sampleGetWeather", intent_name)
def debug_training(dataset_path, config_path=None): with io.open(os.path.abspath(dataset_path), "r", encoding="utf8") as f: dataset = json.load(f) load_resources(dataset["language"]) if config_path is None: config = NLUEngineConfig() else: with io.open(config_path, "r", encoding="utf8") as f: config = NLUEngineConfig.from_dict(json.load(f)) engine = SnipsNLUEngine(config).fit(dataset) while True: query = input("Enter a query (type 'q' to quit): ").strip() if isinstance(query, bytes): query = query.decode("utf8") if query == "q": break print(json.dumps(engine.parse(query), indent=2))
def debug_inference(engine_path): with io.open(os.path.abspath(engine_path), "r", encoding="utf8") as f: engine_dict = json.load(f) load_resources(engine_dict["dataset_metadata"]["language_code"]) engine = SnipsNLUEngine.from_dict(engine_dict) while True: query = input("Enter a query (type 'q' to quit): ").strip() if isinstance(query, bytes): query = query.decode("utf8") if query == "q": break print(json.dumps(engine.parse(query), indent=2))
def main_train_engine(): args = vars(parse_train_args(sys.argv[1:])) dataset_path = args.pop("dataset_path") with io.open(dataset_path, "r", encoding="utf8") as f: dataset = json.load(f) if args.get("config_path") is not None: config_path = args.pop("config_path") with io.open(config_path, "r", encoding="utf8") as f: config = json.load(f) else: config = NLUEngineConfig() load_resources(dataset["language"]) engine = SnipsNLUEngine(config).fit(dataset) print("Create and train the engine...") output_path = args.pop("output_path") serialized_engine = bytes(json.dumps(engine.to_dict()), encoding="utf8") with io.open(output_path, "w", encoding="utf8") as f: f.write(serialized_engine.decode("utf8")) print("Saved the trained engine to %s" % output_path)
def main_engine_inference(): args = vars(parse_inference_args(sys.argv[1:])) training_path = args.pop("training_path") with io.open(os.path.abspath(training_path), "r", encoding="utf8") as f: engine_dict = json.load(f) engine = SnipsNLUEngine.from_dict(engine_dict) language = engine._dataset_metadata[ # pylint: disable=protected-access "language_code"] load_resources(language) while True: query = input("Enter a query (type 'q' to quit): ").strip() if isinstance(query, bytes): query = query.decode("utf8") if query == "q": break print(json.dumps(engine.parse(query), indent=2))
#Download resource #YOU must run this as admin import subprocess subprocess.run(["python", "-m", "snips_nlu", "download", "es"]) #---- import io import json #Load with io.open("trainingDatasets/multiple.json") as f: with io.open("trainingDatasets/multiple.json") as f: sample_dataset = json.load(f) #Train from snips_nlu import SnipsNLUEngine nlu_engine = SnipsNLUEngine() print(sample_dataset) nlu_engine.fit(sample_dataset) #Test import json parsing = nlu_engine.parse( "Creame una máquina virtual con seis cores y 3 gigas de ram en Europa") print(json.dumps(parsing, indent=2)) print("---------------------") parsing = nlu_engine.parse("Dame de alta un data lake en Asia") print(json.dumps(parsing, indent=2))
from odo import odo from flask_cors import CORS from collections import defaultdict import datetime app = Flask(__name__) CORS(app) app.config['MONGO_DBNAME'] = 'apiwatch' app.config['MONGO_URI'] = 'mongodb://51.38.49.225:27017/apiwatch' mongo = PyMongo(app) with io.open("./datajson.json") as f: sample_dataset = json.load(f) load_resources("fr") nlu_engine = SnipsNLUEngine() nlu_engine.fit(sample_dataset) @app.route('/star', methods=['GET']) def get_all_stars(): star = mongo.db.apiwatch output = [] for s in star.find(): output.append({'texte': s['texte']}) return jsonify({'result': output}) @app.route('/star', methods=['POST']) def add_star(): texte = request.json['texte']
def nlu(self): global snips_engines # try to load nlu for given language try: if (self.lang in snips_engines.keys()): nlu_engine = snips_engines[self.lang] else: print("*** " + self.lang + " ***") snips_engines[self.lang] = SnipsNLUEngine.from_path( "models/" + str(self.lang)) nlu_engine = snips_engines[self.lang] self.nlu_parsing = nlu_engine.parse(self.text) if (self.probability >= float( self.nlu_parsing["intent"]["probability"])): neural = self.nn(str(self.lang), self.text) if (neural == False and neural != None): self.nlu_parsing["intent"]["probability"] = float(0) elif (neural == True and neural != None): self.nlu_parsing["intent"]["probability"] = float(0.9) except Exception as e: print(e) # use lang detect from translator, try to load nlu again self.translate(src=True) try: if (self.lang in snips_engines.keys()): nlu_engine = snips_engines[self.lang] else: snips_engines[self.lang] = SnipsNLUEngine.from_path( "models/" + str(self.lang)) nlu_engine = snips_engines[self.lang] self.nlu_parsing = nlu_engine.parse(self.text) if (self.probability >= float( self.nlu_parsing["intent"]["probability"])): neural = self.nn(str(self.lang), self.text) if (neural == False and neural != None): self.nlu_parsing["intent"]["probability"] = float(0) elif (neural == True and neural != None): self.nlu_parsing["intent"]["probability"] = float(0.9) except Exception as e: print(e) # load default nlu, for case that langdetect failed if (self.mainlang in snips_engines.keys()): nlu_engine = snips_engines[self.mainlang] else: print("*** " + self.lang + " ***") snips_engines[self.lang] = SnipsNLUEngine.from_path( "models/" + self.mainlang) nlu_engine = snips_engines[self.mainlang] self.nlu_parsing = nlu_engine.parse(self.translated) if (self.probability >= float( self.nlu_parsing["intent"]["probability"])): neural = self.nn(str(self.lang), self.translated) if (neural == False and neural != None): self.nlu_parsing["intent"]["probability"] = float(0) elif (neural == True and neural != None): self.nlu_parsing["intent"]["probability"] = float(0.9) self.nlu_parsing["lang"] = self.lang try: slots = self.nlu_parsing["slots"] for x in slots: self.nlu_parsing[x["slotName"]] = x["value"]["value"] except Exception as e: print(e) print(self.nlu_parsing)
from snips_nlu import SnipsNLUEngine from snips_nlu.default_configs import CONFIG_EN from snips_nlu_parsers import BuiltinEntityParser import json from twilio.twiml.messaging_response import MessagingResponse from twilio.rest import Client import sqlite3 from pprint import pprint from jsonmerge import merge import os from datetime import date account_sid = 'ACdba21fe9a93646530323131c9062a9d6' auth_token = '60bc05d98fa863102d8f9bb40ed3cb7a' client = Client(account_sid, auth_token) default_engine = SnipsNLUEngine() list_doc = [ "doctor rekha pradeep", "dr rekha pradeep", "dr arvind shenoi" "doctor arvind shenoi", "doctor ashwin", "dr ashwin", "doctor ajay", "dr ajay", "doctor urvashi", "dr urvashi", "doctor kanjus", "dr kanjus", "doctor akshay", "dr akshay", "doctor sahu", "dr sahu" ] def init_snipsnlu(): engine = SnipsNLUEngine(config=CONFIG_EN) with io.open("proj_new.json") as f: dataset = json.load(f) engine.fit(dataset) return engine
from rest_framework.response import Response import json from snips_nlu import SnipsNLUEngine from snips_nlu.dataset import Dataset from snips_nlu.default_configs import CONFIG_EN from pprint import pprint # Create your views here. dataset = Dataset.from_yaml_files("en", ["data.yaml"]) j = dataset.json m = json.dumps(j) sample_dataset = json.loads(m) nlu_engine = SnipsNLUEngine(config=CONFIG_EN) nlu_engine = nlu_engine.fit(sample_dataset, force_retrain=False) class MakeYAMLFromDB(APIView): def get(self, request): global nlu_engine d = request.data el = [] entities = list(Entity.objects.all().values( 'name', 'automatically_extensible', 'use_synonyms', 'matching_strictness')) for e in entities: jm = e jm['type'] = 'entity' lll = list(
import io import json from snips_nlu import SnipsNLUEngine, load_resources load_resources("es") with io.open("trained.json") as f: engine_dict = json.load(f) engine = SnipsNLUEngine.from_dict(engine_dict) phrase = input("Pregunta: ") r = engine.parse(phrase) print(json.dumps(r, indent=2))
import io import json import requests from snips_nlu import SnipsNLUEngine, load_resources from snips_nlu.default_configs import CONFIG_FR with io.open("dataset.json") as f: sample_dataset = json.load(f) load_resources("fr") #indinque la langue nlu_engine = SnipsNLUEngine( config=CONFIG_FR) #configuration supplémentaire pour la langue nlu_engine.fit(sample_dataset) api_address = 'http://api.openweathermap.org/data/2.5/forecast?appid=00bd3724beb92a272a9c8fcb32a99867&q=' while (True): x = input("tapez votre question sur le temps? ") #parsing = engine.parse(u""+x) #res = json.dumps(parsing, indent=2) #city=extract city name print(x)
class BrewSnips: def __init__(self, json_arg): self.nlu_engine = None self.json_arg = json_arg def simple_text_cleaner(self, query_text): """ Replace, encode, remove unnecessary texts to ensure alignment w/ yaml format. :param query_text: ( String ) Query :return: ( String ) Cleaned-format query """ # Encode into "utf-8" format query_text = query_text.encode(encoding='utf-8').decode("utf-8") # Replace $, %, + into relative words query_text = query_text.replace("$", "") query_text = query_text.replace("%", "percent") query_text = query_text.replace("+", "plus") # Remove punctuations query_text = query_text.translate( str.maketrans('', '', string.punctuation)) # Replace "’" to avoid parsing error query_text = query_text.replace("’", "") return query_text def parse_snips_intent(self): """ Parse original data.json into Snips NLU Engine Training Data in yaml format. Convert into yaml file through command line prompt : 'snips-nlu generate-dataset en input-yaml-file > output-json-file' """ # Get original data.json in DataFrame data_df = DataProcessing( f"{getcwd()}/data_lake/{self.json_arg}").retrieve_process_json() # Get list of Unique Intents intent_list = list(set(data_df["Intent"])) # Load SpaCy NLP Large Corpus spacy_nlp_engine = load('en_core_web_lg') # Init yaml object yaml = ruamel.yaml.YAML() # Set explicit start to True yaml.explicit_start = True # Parse by Intents for intent_name in intent_list: # yes and no are reserved values for yaml file. # To avoid parsing error, "_" is added before the intent name. if intent_name == "yes" or intent_name == "no": intent_dict = {"type": "intent", "name": f"{intent_name}s"} else: intent_dict = {"type": "intent", "name": intent_name} # Init Lists for Slots + Utterances slots_value_list = [] utt_value_list = [] # Subset current Intent Data subset_data = data_df[data_df["Intent"] == intent_name].reset_index(drop=True) # Get current Intent Queries intent_query_words = list(subset_data["Query"]) # Get the 4 grams and convert into a list word_ngrams = (pd.Series(ngrams(intent_query_words, 4))).to_list() # Random sample 80% of each Intent as training phrases for NLU Engine sample_ngrams = sample(word_ngrams, int(len(subset_data) * 0.8)) # Start parsing each queries for phrases in sample_ngrams: # Join phrases back to one single sentence full_text = " ".join(phrases) # Parse Entity of the text through Spacy NLP Engine parse_phrases = spacy_nlp_engine(full_text) # Set slots if len(parse_phrases.ents) > 0: # Get Entity Label and Text, if any for nlp_entity in parse_phrases.ents: entity_label = nlp_entity.label_ entity_text = nlp_entity.text # Construct "slot" for name and entity slot_entities = { "name": entity_label, "entity": entity_label } # Replace text with entity label full_text = full_text.replace( entity_text, f"[{entity_label}]({entity_text})") # Store "utterances" from the ngram utt_value_list.append(full_text) # Store unique "slots" if slot_entities not in slots_value_list: slots_value_list.append(slot_entities) # Set slots in intent dictionary if len(slots_value_list) > 0: intent_dict["slots"] = slots_value_list # Set utterances in intent dictionary if len(utt_value_list) > 0: intent_dict["utterances"] = utt_value_list # If there's no utterances found, use the original ngrams else: intent_dict["utterances"] = [ " ".join(gram) for gram in sample_ngrams ] # Append into output yaml with open(f"{getcwd()}/data_lake/intent_ngram.yaml", "a") as file: yaml.dump(intent_dict, file) def get_nlu_engine(self): """ Get JSON file with our intents and entities tag from part of our input data. Update the Snips NLU Engine. :return: ( Snips NLU Object ) Customized Snips NLU Engine """ # Get parsed intent ngram JSON file with io.open(f"{getcwd()}/data_lake/intent_ngram.json") as f: custom_dataset = json.load(f) # Init Snips NLU Engine self.nlu_engine = SnipsNLUEngine(config=CONFIG_EN) # Fit sample data into NLU Engine for customization self.nlu_engine = self.nlu_engine.fit(custom_dataset) return self.nlu_engine def parse_intent_name_prob(self, text): """ Parse New IntentName and Probability based on customized Snips NLU Engine. :param text: ( String ) Query :return: ( List ) Intent Name and Probability Score """ # Parse queries to determine intent name and probability score parsing = self.nlu_engine.parse(text) # Get intent name intent_name = parsing["intent"]["intentName"] # get the probability intent_prob = parsing["intent"]["probability"] return [intent_name, intent_prob] def brew_intent_score(self): """ Generate new intent name and probability score for each query. Parse them into JSON format and write into a JSON file. """ # Get Customized Snips NLU Engine self.get_nlu_engine() # Get the original json data with io.open(f"{getcwd()}/data_lake/{self.json_arg}") as f: data_df = json.load(f) # Convert list of lists into DataFrame w/ relative columns data_content_df = pd.DataFrame(data_df, columns=["Query", "Intent"]) # Set Intent Similarity Score w/ New Intent data_content_df["NLU_Intent_Score"] = data_content_df[ "Query"].apply(self.parse_intent_name_prob) # Split into individual columns : Intent and Score data_content_df[["NLU_Intent", "NLU_Score"]] = pd.DataFrame( data_content_df["NLU_Intent_Score"].tolist(), index=data_content_df.index) # Drop unused column data_content_df.drop("NLU_Intent_Score", axis=1, inplace=True) # Store all output into feather file write_feather(data_content_df, f"{getcwd()}/data_lake/SnipsNLUData.feather")
import io import json from snips_nlu import SnipsNLUEngine from snips_nlu.default_configs import CONFIG_EN # snips-nlu generate-dataset en dataset.yaml > dataset.json engine = SnipsNLUEngine(config=CONFIG_EN) with io.open('dataset.json') as f: dataset = json.load(f) engine.fit(dataset) parsing = engine.parse('Please give me some lights in the entrance !') print(json.dumps(parsing, indent=2))
from __future__ import unicode_literals, print_function import io import json from snips_nlu import SnipsNLUEngine, load_resources with io.open("sample_dataset.json") as f: sample_dataset = json.load(f) with io.open("configs/config_en.json") as f: config = json.load(f) load_resources(sample_dataset["language"]) nlu_engine = SnipsNLUEngine(config=config) nlu_engine.fit(sample_dataset) text = "What will be the weather in San Francisco next week?" parsing = nlu_engine.parse(text) print(json.dumps(parsing, indent=2))
import io import json from snips_nlu import SnipsNLUEngine, load_resources from snips_nlu.default_configs import CONFIG_EN load_resources("en") engine = SnipsNLUEngine(config=CONFIG_EN) default_engine = SnipsNLUEngine()
def fit(self, dataset): self.engine = SnipsNLUEngine(self.config).fit(dataset) return self
def label_data_with_rasa_nlu_model(lang='en', save="", out='practice'): """ Label counterfactual training data :param lang: abbreviate language name of model :param save: path name where model is saved :return: csv file :rtype: file """ from rasa.nlu.model import Interpreter from rasa.nlu.components import ComponentBuilder from zipfile import ZipFile from snips_nlu import SnipsNLUEngine import pickle import json model = model_path / "rasa_semeval_2020_model_task2_{}".format(save) if Path(model).exists(): print("\n--> Loading Rasa model 1...") model1 = str(model / "nlu_20200515-042204") nlu_engine, nlu_engine2 = "", "" with codecs.open(model_path / "builder_task2_{}.pkl".format(save), "rb") as ant: builder = pickle.load(ant) nlu_engine = Interpreter.load(model1, builder) print("\n--> Loading Snips model 2...") #model2 = str(model / "nlu_20200515-185057") #nlu_engine2 = Interpreter.load(model2, builder) print("\n--> Loading Snips model 3...") model3 = str(model_path / "snips_semeval_2020_model_task2_ct_v5") nlu_engine3 = SnipsNLUEngine.from_path(model3) print("\n--> Loading Snips model 4...") model4 = model_path / "snips_semeval_2020_model_task2_ct_v6" nlu_engine4 = SnipsNLUEngine.from_path(model4) print("\n--> Loading Snips model 5...") model5 = model_path / "snips_semeval_2020_model_task2_ct_v7" nlu_engine5 = SnipsNLUEngine.from_path(model5) print("\n--> Loading Snips model 6...") model6 = str(model / "nlu_20200513-075312_desc") nlu_engine6 = Interpreter.load(model6, builder) # if out == 'evaluate': print("--> [EVALUATION] Start labeling with Rasa model...") pd_data = pandas.read_csv(test_task_2) id_sent, pred = [], [] count = 0 for i, row in pd_data.iterrows(): sentence = row['sentence'] predict = nlu_engine.parse(sentence, time=3) antecedent, consequent = [], [] #print("-- entities: ", predict['entities']) for slot in predict['entities']: if slot['entity'] == "consequent": consequent = [slot['start'], slot['end']] if slot['entity'] == "antecedent": antecedent = [slot['start'], slot['end']] if len(antecedent) == 0 and len(consequent) == 0: """ predict2 = nlu_engine2.parse(sentence) antecedent, consequent = [], [] for slot in predict['entities']: if slot['entity'] == "consequent": consequent = [slot['start'], slot['end']] if slot['entity'] == "antecedent": antecedent = [slot['start'], slot['end']] """ if len(predict['entities']) == 0: predict3 = nlu_engine3.parse(sentence) antecedent, consequent = [], [] for slot in predict3['slots']: if slot['slotName'] == "Consequent": end_id = slot['range']['end'] start_id = slot['range']['start'] #if end_id != -1: end_id = end_id - 1 consequent = [start_id, end_id] if slot['slotName'] == "Antecedent": end_id = slot['range']['end'] start_id = slot['range']['start'] #if end_id != -1: end_id = end_id - 1 antecedent = [start_id, end_id] if len(predict3['slots']) == 0: predict4 = nlu_engine4.parse(sentence) antecedent, consequent = [], [] for slot in predict4['slots']: if slot['slotName'] == "Consequent": end_id = slot['range']['end'] start_id = slot['range']['start'] #if end_id != -1: end_id = end_id - 1 consequent = [start_id, end_id] if slot['slotName'] == "Antecedent": end_id = slot['range']['end'] start_id = slot['range']['start'] #if end_id != -1: end_id = end_id - 1 antecedent = [start_id, end_id] if len(predict4['slots']) == 0: predict5 = nlu_engine5.parse(sentence) antecedent, consequent = [], [] for slot in predict5['slots']: if slot['slotName'] == "Consequent": end_id = slot['range']['end'] start_id = slot['range']['start'] #if end_id != -1: end_id = end_id - 1 consequent = [start_id, end_id] if slot['slotName'] == "Antecedent": end_id = slot['range']['end'] start_id = slot['range']['start'] #if end_id != -1: end_id = end_id - 1 antecedent = [start_id, end_id] if len(predict4['slots']) == 0: predict6 = nlu_engine6.parse(sentence) antecedent, consequent = [], [] for slot in predict6['entities']: if slot['entity'] == "consequent": consequent = [ slot['start'], slot['end'] ] if slot['entity'] == "antecedent": antecedent = [ slot['start'], slot['end'] ] if len(predict6['entities']) == 0: count += 1 print("count: ", count) print(predict4) else: print("-------- ok ! ") if len(antecedent) == 0: antecedent = [-1, -1] if len(consequent) == 0: consequent = [-1, -1] sent_id = row['sentenceID'] eval_out = (sent_id, antecedent[0], antecedent[1], consequent[0], consequent[1]) pred.append(eval_out) # print(eval_out) print("no treated: ", count) # antecedent_endid results = pandas.DataFrame(data=pred, columns=[ "sentenceID", "antecedent_startid", "antecedent_endid", "consequent_startid", "consequent_endid" ]) model_saved = model_path / \ "rasa_semeval_2020_evaluation_task2_final_{}.csv".format(save) results.to_csv(model_saved, index=False) from datetime import datetime dtime = datetime.now().strftime("%Y%m%d-%H%M%S") results_name = "rasa_semeval_2020_evaluation_{}_{}.zip".format( save, dtime) results.to_csv(model_saved, index=False) with ZipFile(model_path / results_name, 'w') as myzip: myzip.write(str(model_saved), "subtask2.csv") elif out == 'practice': print("--> [PRACTICE] Start labeling with Rasa model...") test_task_prac_1 = source / "data/task2-train.csv" pd_data = pandas.read_csv(test_task_prac_1) id_sent, pred = [], [] for i, row in pd_data.iterrows(): sentence = row['sentence'] predict = nlu_engine.parse(sentence) antecedent, consequent = [], [] for slot in predict['slots']: if slot['slotName'] == "consequent": end_id = slot['range']['end'] if end_id != -1: end_id = end_id - 1 consequent = [slot['range']['start'], end_id] if slot['slotName'] == "antecedent": end_id = slot['range']['end'] if end_id != -1: end_id = end_id - 1 antecedent = [slot['range']['start'], end_id] if len(antecedent) == 0: antecedent = [-1, -1] if len(consequent) == 0: consequent = [-1, -1] sent_id = row['sentenceID'] eval_out = (sent_id, antecedent[0], antecedent[1], consequent[0], consequent[1]) pred.append(eval_out) print(eval_out, row['antecedent_startid'], row['antecedent_endid']) # antecedent_endid results = pandas.DataFrame(data=pred, columns=[ "sentenceID", "antecedent_startid", "antecedent_endid", "consequent_startid", "consequent_endid" ]) results_name = "rasa_semeval_2020_evaluation_practice_{}.zip".format( save) model_saved = model_path / "subtask2.csv" results.to_csv(model_saved, index=False) with ZipFile(model_path / results_name, 'w') as myzip: myzip.write("subtask2.csv", model_saved.open())
def label_data_with_snips_nlu_model(lang='en', save="", out='practice'): """ Label counterfactual training data :param lang: abbreviate language name of model :param save: path name where model is saved :return: csv file :rtype: file """ from snips_nlu import SnipsNLUEngine from snips_nlu.default_configs import CONFIG_EN from snips_nlu_metrics import compute_train_test_metrics, compute_cross_val_metrics import pickle import json model = model_path / "snips_semeval_2020_model_task2_{}".format(save) if Path(model).exists(): print("\n--> Loading Snips model...") nlu_engine = SnipsNLUEngine.from_path(model) if out == 'evaluate': print("--> [EVALUATION] Start labeling with Snips model...") pd_data = pandas.read_csv(test_task_2) id_sent, pred = [], [] for i, row in pd_data.iterrows(): sentence = row['sentence'] predict = nlu_engine.parse(sentence) antecedent, consequent = [], [] for slot in predict['slots']: if slot['slotName'] == "Consequent": end_id = slot['range']['end'] start_id = slot['range']['start'] #if end_id != -1: end_id = end_id - 1 consequent = [start_id, end_id] if slot['slotName'] == "Antecedent": end_id = slot['range']['end'] start_id = slot['range']['start'] #if end_id != -1: end_id = end_id - 1 antecedent = [start_id, end_id] if len(antecedent) == 0: antecedent = [-1, -1] if len(consequent) == 0: consequent = [-1, -1] sent_id = row['sentenceID'] eval_out = (sent_id, antecedent[0], antecedent[1], consequent[0], consequent[1]) pred.append(eval_out) #print(predict) #print(predict['input'][antecedent[0]:antecedent[1]]) print(eval_out) # antecedent_endid results = pandas.DataFrame(data=pred, columns=[ "sentenceID", "antecedent_startid", "antecedent_endid", "consequent_startid", "consequent_endid" ]) model_saved = model_path / \ "snips_semeval_2020_evaluation_final_{}.csv".format(save) results.to_csv(model_saved, index=False) from datetime import datetime from zipfile import ZipFile dtime = datetime.now().strftime("%Y%m%d-%H%M%S") results_name = "snips_semeval_2020_evaluation_task2_{}_{}.zip".format( save, dtime) results.to_csv(model_saved, index=False) with ZipFile(model_path / results_name, 'w') as myzip: myzip.write(str(model_saved), "subtask2.csv") print( "--> [EVALUATION] End labeling and saving with Snips model...") elif out == 'practice': print("--> [PRACTICE] Start labeling with Snips model...") test_task_prac_1 = source / "data/task2-train.csv" pd_data = pandas.read_csv(test_task_prac_1) id_sent, pred = [], [] for i, row in pd_data.iterrows(): sentence = row['sentence'] predict = nlu_engine.parse(sentence) antecedent, consequent = [], [] for slot in predict['slots']: if slot['slotName'] == "Consequent": end_id = slot['range']['end'] if end_id != -1: end_id = end_id - 1 consequent = [slot['range']['start'], end_id] if slot['slotName'] == "Antecedent": end_id = slot['range']['end'] if end_id != -1: end_id = end_id - 1 antecedent = [slot['range']['start'], end_id] if len(antecedent) == 0: antecedent = [-1, -1] if len(consequent) == 0: consequent = [-1, -1] sent_id = row['sentenceID'] eval_out = (sent_id, antecedent[0], antecedent[1], consequent[0], consequent[1]) pred.append(eval_out) print(eval_out, row['antecedent_startid'], row['antecedent_endid']) # antecedent_endid results = pandas.DataFrame(data=pred, columns=[ "sentenceID", "antecedent_startid", "antecedent_endid", "consequent_startid", "consequent_endid" ]) model_saved = model_path / \ "snips_semeval_2020_evaluation_practice_{}.csv".format(save) results.to_csv(model_saved, index=False) print( "--> [EVALUATION] Start labeling and saving with Snips model..." )
import io import json from snips_nlu import SnipsNLUEngine nlu_engine = SnipsNLUEngine() with open("./chatbot/data/input.json") as input_file: input_dict = json.load(input_file) input = input_dict['input'] with io.open("./chatbot/data/dataset.json") as f: sample_dataset = json.load(f) nlu_engine.fit(sample_dataset) parsing = nlu_engine.parse(u"%s" % input) file = open("./chatbot/data/output.json", "w") file.write(json.dumps(parsing, indent=2))
from __future__ import unicode_literals, print_function import json from pathlib import Path from snips_nlu import SnipsNLUEngine from snips_nlu.default_configs import CONFIG_EN SAMPLE_DATASET_PATH = Path(__file__).parent / "sample_dataset.json" with SAMPLE_DATASET_PATH.open(encoding="utf8") as f: sample_dataset = json.load(f) nlu_engine = SnipsNLUEngine(config=CONFIG_EN) nlu_engine.fit(sample_dataset) text = "What will be the weather in San Francisco next week?" parsing = nlu_engine.parse(text) print(json.dumps(parsing, indent=2))
from snips_nlu import SnipsNLUEngine import json engine = SnipsNLUEngine.from_path("demo") parsing = engine.parse("Turn lights on in the bathroom please") print("parsing...") print(json.dumps(parsing, indent=2))
from flask import Flask, request from snips_nlu import SnipsNLUEngine app = Flask(__name__) engine = SnipsNLUEngine.from_path('/nlu_engine/') @app.route('/query', methods=['POST']) def parse_query(): query = request.form['query'] intent = engine.parse(query) return intent app.run(host='0.0.0.0')
class SnipsNLUFramework(NLUFramework): # pylint: disable=arguments-differ async def construct( # type: ignore self, global_config: GlobalConfig, skip_language_installations: bool = False) -> None: """ Args: global_config: Global configuration for the whole test framework. skip_language_installations: A boolean indicating whether to skip the installation of required language resources. Defaults to False. """ self.__python = global_config.python self.__skip_language_installations = skip_language_installations # pylint: disable=attribute-defined-outside-init async def prepareDataSet(self, data_set: NLUDataSet) -> None: last_exception = None # Try all language tag derivations, from specific to broad for language in Language.get( data_set.language).simplify_script().broaden(): language = language.to_tag() try: if not self.__skip_language_installations: self._logger.info( "Installing language resources for \"%s\"...", language) subprocess.run([ self.__python, "-m", "snips_nlu", "download", language ], check=True) self.__language = language last_exception = None break except BaseException as e: # pylint: disable=broad-except last_exception = e if last_exception is not None: raise last_exception async def unprepareDataSet(self) -> None: del self.__language # pylint: disable=attribute-defined-outside-init async def train(self, training_data: List[NLUDataEntry]) -> None: self.__engine = SnipsNLUEngine(DEFAULT_CONFIGS[self.__language]) intents = {} for intent in {x.intent for x in training_data}: utterances = [] # Lambda not correctly supported in mypy for entry in filter(lambda x, i=intent: x.intent == i, training_data): # type: ignore utterances.append({"data": [{"text": entry.sentence}]}) intents[intent] = {"utterances": utterances} self.__engine.fit({ "language": self.__language, "intents": intents, "entities": {} }) async def rateIntents(self, sentence: str) -> NLUIntentRating: intents = self.__engine.get_intents(sentence) return NLUIntentRating(sentence, [(x["intentName"], x["probability"]) for x in intents]) async def cleanupTraining(self) -> None: del self.__engine
import sys import io import os import shutil from io import TextIOWrapper, BytesIO from contextlib import redirect_stdout import json from snips_nlu.cli import training from snips_nlu.cli import generate_dataset from snips_nlu import SnipsNLUEngine from snips_nlu.default_configs import CONFIG_EN engine = SnipsNLUEngine(config=CONFIG_EN) def train_model(_input, name, sub=""): input = os.path.join("..", "Victor.NLU.Snips", "Datasets", sub, _input) if not os.path.exists(input) or not os.path.isfile(input): raise f'The file {input}" does not exist' output = input.replace(".yaml", ".json") with TextIOWrapper(io.FileIO(output, "wb"), sys.stdout.encoding) as buf, redirect_stdout(buf): generate_dataset.generate_dataset("en", input) buf.flush() buf.close() engine_path = os.path.join("..", "Victor.NLU.Snips", "Engines", sub, name) if os.path.isdir(engine_path): print("Overwriting existing engine directory {0}.".format(engine_path)) shutil.rmtree(engine_path)
""" This file is responsible for training and saving the SnipsNLU Engine """ import json from snips_nlu import SnipsNLUEngine from snips_nlu.default_configs import CONFIG_EN engine = SnipsNLUEngine(config=CONFIG_EN) with open("dataset.json") as f: dataset = json.load(f) engine.fit(dataset) engine.persist("persisted_engine")
"models/" + lang + '_transformer', num_labels=numKeys, use_cuda=False, args={ 'reprocess_input_data': True, 'overwrite_output_dir': True, 'num_train_epochs': 15, "train_batch_size": 16, "eval_batch_size": 16, 'no_cache': True, 'use_cached_eval_features': False, 'save_model_every_epoch': False }) transformer_engines[lang] = model snips_engines[lang] = SnipsNLUEngine.from_path("models/" + str(lang)) print("**loaded " + lang + " ***") except Exception as e: print(e) translationList = ["en"] for lgc in translationList: # from de to language mname = 'Helsinki-NLP/opus-mt-de-' + lgc model = MarianMTModel.from_pretrained(mname) tok = MarianSentencePieceTokenizer.from_pretrained(mname) translation_engines[mname] = model translation_tokenizers[mname] = tok
class SnipsInterpreter(Interpreter): def __init__(self): super(SnipsInterpreter, self).__init__('snips') self._meta = None self._training_data = None self._lang = None self._engine = None self._entity_parser = None self._log.info('Using snips-nlu version %s' % __version__) def metadata(self): return self._meta def lang(self): return self._lang def training(self): result = [] for intent, data in self._training_data['intents'].items(): for utterance in data['utterances']: result.append({ 'text': ''.join([u['text'] for u in utterance['data']]), 'intent': intent, }) return result def fit(self, training_file_path, trained_directory_path): filename, _ = os.path.splitext(os.path.basename(training_file_path)) # TODO check what should be in the base Interpreter class trained_path = os.path.join(trained_directory_path, '%s.trained.json' % filename) checksum_path = os.path.join(trained_directory_path, '%s.checksum' % filename) with open(training_file_path) as f: training_str = f.read() self._training_data = json.loads(training_str) self._lang = self._training_data['language'] self._log.info('Loading resources for language %s' % self._lang) load_resources(self._lang) same, computed_checksum = self.checksum_match(training_str, checksum_path) # Checksums match, load the engine from trained file if same and os.path.isfile(trained_path): self._log.info('Checksum matched, loading trained engine') with open(trained_path) as f: self._engine = SnipsNLUEngine.from_dict(json.load(f)) else: self._log.info('Checksum has changed, retraining the engine') self._engine = SnipsNLUEngine() self._engine.fit(self._training_data) with open(trained_path, mode='w') as f: json.dump(self._engine.to_dict(), f) with open(checksum_path, mode='w') as f: f.write(computed_checksum) self._entity_parser = BuiltinEntityParser(self._lang) self._meta = { k: list(v.keys()) for k, v in self._engine._dataset_metadata['slot_name_mappings'].items() } def parse_entity(self, msg, intent, slot): entity_label = self._engine._dataset_metadata[ 'slot_name_mappings'].get(intent, {}).get(slot) # TODO try to find a way to retrieve multiple slot values, that's a hard one # May be we can try matching on _dataset_metadata['entities'] if entity_label: if is_builtin_entity(entity_label): parsed = self._entity_parser.parse(msg) if parsed: return [parsed[0]['entity']] # TODO if slot is not an auto-extensible, use fuzzy matching to match with restricted values return super(SnipsInterpreter, self).parse_entity(msg, intent, slot) def parse(self, msg): # TODO manage multiple intents in the same sentence parsed = self._engine.parse(msg) if parsed['intent'] == None: return [] slots = {} # Constructs a slot dictionary with slot value as a list if multiples matched for slot in parsed['slots']: name = slot['slotName'] value = slot['value'] if name in slots: slots[name].append(value) else: slots[name] = [value] return [{ 'text': msg, 'intent': parsed['intent']['intentName'], 'slots': slots, }]
import io import json from snips_nlu import SnipsNLUEngine from snips_nlu.default_configs import CONFIG_EN #For German please use ..._DE engine = SnipsNLUEngine(config=CONFIG_EN) with io.open( "dataset.json" ) as f: #dataset.json needs to be changed to the JSON you generated via YAML dataset = json.load(f) engine.fit(dataset) engine.persist("path/to/directory")
import io import json from snips_nlu import SnipsNLUEngine from snips_nlu.default_configs import CONFIG_EN nlu_engine = SnipsNLUEngine(config=CONFIG_EN) def train(): with io.open('dataset.json') as f: dataset = json.load(f) print("training.......") nlu_engine.fit(dataset) nlu_engine.persist("demo") train() parsing = nlu_engine.parse("Hey, lights on in the lounge !") print("parsing...") print(json.dumps(parsing, indent=2)) # Use CLI to generate dataset, snips-nlu generate-dataset en dataset.yaml > dataset.json
def init_snipsnlu(): engine = SnipsNLUEngine(config=CONFIG_EN) with io.open("proj_new.json") as f: dataset = json.load(f) engine.fit(dataset) return engine
config.gpu_options.allow_growth = True config.gpu_options.per_process_gpu_memory_fraction = 0.6 session = tf.Session(config=config) keras.backend.set_session(session) # Load dataset for intent detection try: with io.open(str(Path('app', 'static', 'samples', 'dataset.json'))) as fr: dataset = json.load(fr) except Exception as e: print("Could not load dataset {}".format(str(e))) # Train dataset nlu_engine = SnipsNLUEngine(resources=load_resources("snips_nlu_en")) nlu_engine.fit(dataset) # Load class names print("[INFO]: Loading Classes") classNames = np.load( "/NeMo/examples/applications/asr_service/app/emotion-classification/new_classes.npy" ) # Load tokenizer pickle file print("[INFO]: Loading Tokens") with open( '/NeMo/examples/applications/asr_service/app/emotion-classification/new_tokenizer.pickle', 'rb') as handle: Tokenizer = pickle.load(handle)
""" import io import json import sys from snips_nlu import SnipsNLUEngine from snips_nlu.default_configs import CONFIG_EN with open('/Users/ayaali/Documents/Geni_chatbot/Genei/question_new.txt') as fl: mylist = [line.rstrip('\n') for line in fl] questions=["Has DeNovoSeq a job tracking system? ","What is functional annotation? ", "Which assembler should I use if I manage prokaryotic data?", "What is the FTP browser of DeNovoSeq?"]; default_engine = SnipsNLUEngine() #%% engine = SnipsNLUEngine(config=CONFIG_EN) with io.open("/Users/ayaali/Documents/Geni_chatbot/Genei/dataset.json") as fil: dataset = json.load(fil) #%% res = [] with open('/Users/ayaali/Documents/Geni_chatbot/Genei/out_new.txt', 'w') as f: engine.fit(dataset) seed = 42 engine = SnipsNLUEngine(config=CONFIG_EN, random_state=seed) engine.fit(dataset) for ques in mylist: parsing = engine.parse(ques)
from __future__ import unicode_literals from snips_nlu import SnipsNLUEngine, load_resources from SPARQLWrapper import SPARQLWrapper, JSON import sys import io import json import snips_nlu resultado = [] snips_nlu.load_resources("es") reload(sys) sys.setdefaultencoding('utf8') # lectura del archivo de entrenamiento para identificar la intencion with io.open("dataset.json") as f: engine_dict = json.load(f) engine = SnipsNLUEngine() engine.fit(engine_dict) # metodo para obtener el detalle de la pregunta realizada def pregunta(frase): r = engine.parse(unicode(frase)) return json.dumps(r, indent=2) # conexion a VIRTUOSO sparql = SPARQLWrapper("http://localhost:8890/sparql/plantas1") # metodos para las consultas a VIRTUOSO def tipos(entidad):
import io import json import sys from snips_nlu import SnipsNLUEngine from snips_nlu.default_configs import CONFIG_EN from SimpleWebSocketServer import SimpleWebSocketServer, WebSocket import threading seed = 42 engine = SnipsNLUEngine(config=CONFIG_EN, random_state=seed) server = None clients = [] class SimpleWSServer(WebSocket): def handleConnected(self): clients.append(self) def handleClose(self): clients.remove(self) def run_server(): global server global stop_thread server = SimpleWebSocketServer('', 9000, SimpleWSServer, selectInterval=(1000.0 / 15) / 1000)
def train_eval_snips_nlu_model(lang='en', cross=False, save=''): """ Train snips data from all brat annotation object :param lang: abbreviate language name :param save: path where model will be save :return: None :rtype: None """ from snips_nlu import SnipsNLUEngine from snips_nlu.default_configs import CONFIG_EN from snips_nlu_metrics import compute_train_test_metrics, compute_cross_val_metrics import pickle import json if cross: train_data_obj = BuildSnipsDataTask2(lang, cross=cross, vers=save, add_entities=True) train_data = train_data_obj.build_snips_data_task2() print("--> Evaluating training data with Snips metrics...") filename_results = source_result / "snips_semeval_2020_evaluation_task2_{}.pkl".format( save) if not Path(filename_results).exists(): tt_metrics = compute_train_test_metrics( train_dataset=train_data[0], test_dataset=train_data[1], engine_class=SnipsNLUEngine, include_slot_metrics=True) #print(tt_metrics) if not Path(filename_results).exists(): with codecs.open(filename_results, 'wb') as metric: pickle.dump(tt_metrics, metric) from datetime import datetime dmtime = "_{}_{}".format( save, datetime.now().strftime("%Y%m%d-%H%M%S")) name = "snips_semeval_2020_evaluation_task2{}.json".format( dmtime) filename_results_json = source_result / name with codecs.open(filename_results_json, 'w', "utf-8") as m_json: json.dump(tt_metrics, m_json) else: filename_results = source_result / "snips_semeval_2020_model_task2_{}".format( save) train_data_obj = BuildSnipsDataTask2(lang, cross=cross, vers=save, add_entities=True) train_data = train_data_obj.build_snips_data_task2() #print(CONFIG_EN) nlu_engine = SnipsNLUEngine(CONFIG_EN) print("--> Training patent data with Snips...") nlu_engine.fit(train_data) """ try: print("--> Saving model trained with Snips (JOBLIB)...") filename_joblib = source_result / "snips_semeval_2020_model_task2_{}.pkl".format(save) with codecs.open(filename_joblib, 'wb') as metric: pickle.dump(nlu_engine, metric) except: pass """ print("--> Saving model trained with Snips (SNIPS)...") try: nlu_engine.persist(filename_results) except Exception as e: print("error saving the madel....{}".format(str(e)))