Ejemplo n.º 1
0
    def test_main_train_engine(self):
        # Given
        args = [BEVERAGE_DATASET_PATH, self.tmp_file_path]
        with patch.object(sys, "argv", mk_sys_argv(args)):
            # When
            main_train_engine()

            # Then
            if not os.path.exists(self.tmp_file_path):
                self.fail("No trained engine generated")
            msg = "Failed to create an engine from engine dict."
            with self.fail_if_exception(msg):
                with io.open(self.tmp_file_path, "r", encoding="utf8") as f:
                    trained_engine_dict = json.load(f)
                SnipsNLUEngine.from_dict(trained_engine_dict)
Ejemplo n.º 2
0
    def test_sample_configs_should_work(self):
        # Given
        dataset = self.sample_dataset

        for language in get_all_languages():
            # When
            config_file = "config_%s.json" % language
            config_path = os.path.join(SAMPLES_PATH, "configs", config_file)
            with io.open(config_path) as f:
                config = json.load(f)
            dataset[LANGUAGE] = language
            engine = SnipsNLUEngine(config).fit(dataset)
            result = engine.parse("Please give me the weather in Paris")

            # Then
            intent_name = result[RES_INTENT][RES_INTENT_NAME]
            self.assertEqual("sampleGetWeather", intent_name)
Ejemplo n.º 3
0
def debug_training(dataset_path, config_path=None):
    with io.open(os.path.abspath(dataset_path), "r", encoding="utf8") as f:
        dataset = json.load(f)

    load_resources(dataset["language"])

    if config_path is None:
        config = NLUEngineConfig()
    else:
        with io.open(config_path, "r", encoding="utf8") as f:
            config = NLUEngineConfig.from_dict(json.load(f))

    engine = SnipsNLUEngine(config).fit(dataset)

    while True:
        query = input("Enter a query (type 'q' to quit): ").strip()
        if isinstance(query, bytes):
            query = query.decode("utf8")
        if query == "q":
            break
        print(json.dumps(engine.parse(query), indent=2))
Ejemplo n.º 4
0
def debug_inference(engine_path):
    with io.open(os.path.abspath(engine_path), "r", encoding="utf8") as f:
        engine_dict = json.load(f)

    load_resources(engine_dict["dataset_metadata"]["language_code"])
    engine = SnipsNLUEngine.from_dict(engine_dict)

    while True:
        query = input("Enter a query (type 'q' to quit): ").strip()
        if isinstance(query, bytes):
            query = query.decode("utf8")
        if query == "q":
            break
        print(json.dumps(engine.parse(query), indent=2))
Ejemplo n.º 5
0
def main_train_engine():
    args = vars(parse_train_args(sys.argv[1:]))

    dataset_path = args.pop("dataset_path")
    with io.open(dataset_path, "r", encoding="utf8") as f:
        dataset = json.load(f)

    if args.get("config_path") is not None:
        config_path = args.pop("config_path")
        with io.open(config_path, "r", encoding="utf8") as f:
            config = json.load(f)
    else:
        config = NLUEngineConfig()

    load_resources(dataset["language"])
    engine = SnipsNLUEngine(config).fit(dataset)
    print("Create and train the engine...")

    output_path = args.pop("output_path")
    serialized_engine = bytes(json.dumps(engine.to_dict()), encoding="utf8")
    with io.open(output_path, "w", encoding="utf8") as f:
        f.write(serialized_engine.decode("utf8"))
    print("Saved the trained engine to %s" % output_path)
Ejemplo n.º 6
0
def main_engine_inference():
    args = vars(parse_inference_args(sys.argv[1:]))

    training_path = args.pop("training_path")
    with io.open(os.path.abspath(training_path), "r", encoding="utf8") as f:
        engine_dict = json.load(f)
    engine = SnipsNLUEngine.from_dict(engine_dict)
    language = engine._dataset_metadata[  # pylint: disable=protected-access
        "language_code"]
    load_resources(language)

    while True:
        query = input("Enter a query (type 'q' to quit): ").strip()
        if isinstance(query, bytes):
            query = query.decode("utf8")
        if query == "q":
            break
        print(json.dumps(engine.parse(query), indent=2))
#Download resource
#YOU must run this as admin
import subprocess
subprocess.run(["python", "-m", "snips_nlu", "download", "es"])

#----
import io
import json

#Load   with io.open("trainingDatasets/multiple.json") as f:
with io.open("trainingDatasets/multiple.json") as f:
    sample_dataset = json.load(f)

#Train
from snips_nlu import SnipsNLUEngine
nlu_engine = SnipsNLUEngine()
print(sample_dataset)
nlu_engine.fit(sample_dataset)

#Test
import json

parsing = nlu_engine.parse(
    "Creame una máquina virtual con seis cores y 3 gigas de ram en Europa")
print(json.dumps(parsing, indent=2))
print("---------------------")

parsing = nlu_engine.parse("Dame de alta un data lake en Asia")
print(json.dumps(parsing, indent=2))
Ejemplo n.º 8
0
from odo import odo
from flask_cors import CORS
from collections import defaultdict
import datetime

app = Flask(__name__)
CORS(app)
app.config['MONGO_DBNAME'] = 'apiwatch'
app.config['MONGO_URI'] = 'mongodb://51.38.49.225:27017/apiwatch'
mongo = PyMongo(app)

with io.open("./datajson.json") as f:
    sample_dataset = json.load(f)

load_resources("fr")
nlu_engine = SnipsNLUEngine()
nlu_engine.fit(sample_dataset)


@app.route('/star', methods=['GET'])
def get_all_stars():
    star = mongo.db.apiwatch
    output = []
    for s in star.find():
        output.append({'texte': s['texte']})
    return jsonify({'result': output})


@app.route('/star', methods=['POST'])
def add_star():
    texte = request.json['texte']
Ejemplo n.º 9
0
    def nlu(self):
        global snips_engines
        # try to load nlu for given language
        try:
            if (self.lang in snips_engines.keys()):
                nlu_engine = snips_engines[self.lang]
            else:
                print("*** " + self.lang + " ***")
                snips_engines[self.lang] = SnipsNLUEngine.from_path(
                    "models/" + str(self.lang))

            nlu_engine = snips_engines[self.lang]
            self.nlu_parsing = nlu_engine.parse(self.text)
            if (self.probability >= float(
                    self.nlu_parsing["intent"]["probability"])):
                neural = self.nn(str(self.lang), self.text)
                if (neural == False and neural != None):
                    self.nlu_parsing["intent"]["probability"] = float(0)
                elif (neural == True and neural != None):
                    self.nlu_parsing["intent"]["probability"] = float(0.9)
        except Exception as e:
            print(e)
            # use lang detect from translator, try to load nlu again
            self.translate(src=True)

            try:
                if (self.lang in snips_engines.keys()):
                    nlu_engine = snips_engines[self.lang]
                else:
                    snips_engines[self.lang] = SnipsNLUEngine.from_path(
                        "models/" + str(self.lang))

                nlu_engine = snips_engines[self.lang]
                self.nlu_parsing = nlu_engine.parse(self.text)
                if (self.probability >= float(
                        self.nlu_parsing["intent"]["probability"])):
                    neural = self.nn(str(self.lang), self.text)
                    if (neural == False and neural != None):
                        self.nlu_parsing["intent"]["probability"] = float(0)
                    elif (neural == True and neural != None):
                        self.nlu_parsing["intent"]["probability"] = float(0.9)
            except Exception as e:
                print(e)

                # load default nlu, for case that langdetect failed
                if (self.mainlang in snips_engines.keys()):
                    nlu_engine = snips_engines[self.mainlang]
                else:
                    print("*** " + self.lang + " ***")
                    snips_engines[self.lang] = SnipsNLUEngine.from_path(
                        "models/" + self.mainlang)

                nlu_engine = snips_engines[self.mainlang]

                self.nlu_parsing = nlu_engine.parse(self.translated)

                if (self.probability >= float(
                        self.nlu_parsing["intent"]["probability"])):
                    neural = self.nn(str(self.lang), self.translated)
                    if (neural == False and neural != None):
                        self.nlu_parsing["intent"]["probability"] = float(0)
                    elif (neural == True and neural != None):
                        self.nlu_parsing["intent"]["probability"] = float(0.9)

        self.nlu_parsing["lang"] = self.lang

        try:
            slots = self.nlu_parsing["slots"]
            for x in slots:
                self.nlu_parsing[x["slotName"]] = x["value"]["value"]

        except Exception as e:
            print(e)

        print(self.nlu_parsing)
Ejemplo n.º 10
0
from snips_nlu import SnipsNLUEngine
from snips_nlu.default_configs import CONFIG_EN
from snips_nlu_parsers import BuiltinEntityParser
import json
from twilio.twiml.messaging_response import MessagingResponse
from twilio.rest import Client
import sqlite3
from pprint import pprint
from jsonmerge import merge
import os
from datetime import date
account_sid = 'ACdba21fe9a93646530323131c9062a9d6'
auth_token = '60bc05d98fa863102d8f9bb40ed3cb7a'
client = Client(account_sid, auth_token)

default_engine = SnipsNLUEngine()

list_doc = [
    "doctor rekha pradeep", "dr rekha pradeep", "dr arvind shenoi"
    "doctor arvind shenoi", "doctor ashwin", "dr ashwin", "doctor ajay",
    "dr ajay", "doctor urvashi", "dr urvashi", "doctor kanjus", "dr kanjus",
    "doctor akshay", "dr akshay", "doctor sahu", "dr sahu"
]


def init_snipsnlu():
    engine = SnipsNLUEngine(config=CONFIG_EN)
    with io.open("proj_new.json") as f:
        dataset = json.load(f)
        engine.fit(dataset)
    return engine
Ejemplo n.º 11
0
from rest_framework.response import Response
import json

from snips_nlu import SnipsNLUEngine
from snips_nlu.dataset import Dataset
from snips_nlu.default_configs import CONFIG_EN
from pprint import pprint

# Create your views here.

dataset = Dataset.from_yaml_files("en", ["data.yaml"])
j = dataset.json
m = json.dumps(j)
sample_dataset = json.loads(m)

nlu_engine = SnipsNLUEngine(config=CONFIG_EN)
nlu_engine = nlu_engine.fit(sample_dataset, force_retrain=False)


class MakeYAMLFromDB(APIView):
    def get(self, request):
        global nlu_engine
        d = request.data
        el = []
        entities = list(Entity.objects.all().values(
            'name', 'automatically_extensible', 'use_synonyms',
            'matching_strictness'))
        for e in entities:
            jm = e
            jm['type'] = 'entity'
            lll = list(
Ejemplo n.º 12
0
import io
import json
from snips_nlu import SnipsNLUEngine, load_resources

load_resources("es")

with io.open("trained.json") as f:
    engine_dict = json.load(f)

engine = SnipsNLUEngine.from_dict(engine_dict)

phrase = input("Pregunta: ")

r = engine.parse(phrase)
print(json.dumps(r, indent=2))
Ejemplo n.º 13
0
import io
import json
import requests

from snips_nlu import SnipsNLUEngine, load_resources
from snips_nlu.default_configs import CONFIG_FR

with io.open("dataset.json") as f:
    sample_dataset = json.load(f)

load_resources("fr")  #indinque la langue
nlu_engine = SnipsNLUEngine(
    config=CONFIG_FR)  #configuration supplémentaire pour la langue
nlu_engine.fit(sample_dataset)

api_address = 'http://api.openweathermap.org/data/2.5/forecast?appid=00bd3724beb92a272a9c8fcb32a99867&q='
while (True):
    x = input("tapez votre question sur le temps? ")
    #parsing = engine.parse(u""+x)
    #res = json.dumps(parsing, indent=2)
    #city=extract city name

    print(x)
class BrewSnips:
    def __init__(self, json_arg):
        self.nlu_engine = None
        self.json_arg = json_arg

    def simple_text_cleaner(self, query_text):
        """
        Replace, encode, remove unnecessary texts to ensure alignment w/ yaml format.

        :param query_text:  ( String ) Query
        :return:            ( String ) Cleaned-format query
        """

        # Encode into "utf-8" format
        query_text = query_text.encode(encoding='utf-8').decode("utf-8")
        # Replace $, %, + into relative words
        query_text = query_text.replace("$", "")
        query_text = query_text.replace("%", "percent")
        query_text = query_text.replace("+", "plus")
        # Remove punctuations
        query_text = query_text.translate(
            str.maketrans('', '', string.punctuation))
        # Replace "’" to avoid parsing error
        query_text = query_text.replace("’", "")

        return query_text

    def parse_snips_intent(self):
        """
        Parse original data.json into Snips NLU Engine Training Data in yaml format.
        Convert into yaml file through command line prompt :
        'snips-nlu generate-dataset en input-yaml-file > output-json-file'
        """

        # Get original data.json in DataFrame
        data_df = DataProcessing(
            f"{getcwd()}/data_lake/{self.json_arg}").retrieve_process_json()
        # Get list of Unique Intents
        intent_list = list(set(data_df["Intent"]))
        # Load SpaCy NLP Large Corpus
        spacy_nlp_engine = load('en_core_web_lg')
        # Init yaml object
        yaml = ruamel.yaml.YAML()
        # Set explicit start to True
        yaml.explicit_start = True
        # Parse by Intents
        for intent_name in intent_list:
            # yes and no are reserved values for yaml file.
            # To avoid parsing error, "_" is added before the intent name.
            if intent_name == "yes" or intent_name == "no":
                intent_dict = {"type": "intent", "name": f"{intent_name}s"}
            else:
                intent_dict = {"type": "intent", "name": intent_name}
            # Init Lists for Slots + Utterances
            slots_value_list = []
            utt_value_list = []
            # Subset current Intent Data
            subset_data = data_df[data_df["Intent"] ==
                                  intent_name].reset_index(drop=True)
            # Get current Intent Queries
            intent_query_words = list(subset_data["Query"])
            # Get the 4 grams and convert into a list
            word_ngrams = (pd.Series(ngrams(intent_query_words, 4))).to_list()
            # Random sample 80% of each Intent as training phrases for NLU Engine
            sample_ngrams = sample(word_ngrams, int(len(subset_data) * 0.8))
            # Start parsing each queries
            for phrases in sample_ngrams:
                # Join phrases back to one single sentence
                full_text = " ".join(phrases)
                # Parse Entity of the text through Spacy NLP Engine
                parse_phrases = spacy_nlp_engine(full_text)
                # Set slots
                if len(parse_phrases.ents) > 0:
                    # Get Entity Label and Text, if any
                    for nlp_entity in parse_phrases.ents:
                        entity_label = nlp_entity.label_
                        entity_text = nlp_entity.text
                        # Construct "slot" for name and entity
                        slot_entities = {
                            "name": entity_label,
                            "entity": entity_label
                        }
                        # Replace text with entity label
                        full_text = full_text.replace(
                            entity_text, f"[{entity_label}]({entity_text})")
                        # Store "utterances" from the ngram
                        utt_value_list.append(full_text)
                        # Store unique "slots"
                        if slot_entities not in slots_value_list:
                            slots_value_list.append(slot_entities)
            # Set slots in intent dictionary
            if len(slots_value_list) > 0:
                intent_dict["slots"] = slots_value_list
            # Set utterances in intent dictionary
            if len(utt_value_list) > 0:
                intent_dict["utterances"] = utt_value_list
            # If there's no utterances found, use the original ngrams
            else:
                intent_dict["utterances"] = [
                    " ".join(gram) for gram in sample_ngrams
                ]
            # Append into output yaml
            with open(f"{getcwd()}/data_lake/intent_ngram.yaml", "a") as file:
                yaml.dump(intent_dict, file)

    def get_nlu_engine(self):
        """
        Get JSON file with our intents and entities tag from part of our input data.

        Update the Snips NLU Engine.

        :return:    ( Snips NLU Object ) Customized Snips NLU Engine
        """

        # Get parsed intent ngram JSON file
        with io.open(f"{getcwd()}/data_lake/intent_ngram.json") as f:
            custom_dataset = json.load(f)
        # Init Snips NLU Engine
        self.nlu_engine = SnipsNLUEngine(config=CONFIG_EN)
        # Fit sample data into NLU Engine for customization
        self.nlu_engine = self.nlu_engine.fit(custom_dataset)

        return self.nlu_engine

    def parse_intent_name_prob(self, text):
        """
        Parse New IntentName and Probability based on customized Snips NLU Engine.

        :param text:    ( String ) Query
        :return:        ( List ) Intent Name and Probability Score
        """

        # Parse queries to determine intent name and probability score
        parsing = self.nlu_engine.parse(text)
        # Get intent name
        intent_name = parsing["intent"]["intentName"]
        # get the probability
        intent_prob = parsing["intent"]["probability"]

        return [intent_name, intent_prob]

    def brew_intent_score(self):
        """
        Generate new intent name and probability score for each query.
        Parse them into JSON format and write into a JSON file.
        """

        # Get Customized Snips NLU Engine
        self.get_nlu_engine()
        # Get the original json data
        with io.open(f"{getcwd()}/data_lake/{self.json_arg}") as f:
            data_df = json.load(f)
            # Convert list of lists into DataFrame w/ relative columns
            data_content_df = pd.DataFrame(data_df,
                                           columns=["Query", "Intent"])
            # Set Intent Similarity Score w/ New Intent
            data_content_df["NLU_Intent_Score"] = data_content_df[
                "Query"].apply(self.parse_intent_name_prob)
            # Split into individual columns : Intent and Score
            data_content_df[["NLU_Intent", "NLU_Score"]] = pd.DataFrame(
                data_content_df["NLU_Intent_Score"].tolist(),
                index=data_content_df.index)
            # Drop unused column
            data_content_df.drop("NLU_Intent_Score", axis=1, inplace=True)
            # Store all output into feather file
            write_feather(data_content_df,
                          f"{getcwd()}/data_lake/SnipsNLUData.feather")
Ejemplo n.º 15
0
import io
import json
from snips_nlu import SnipsNLUEngine
from snips_nlu.default_configs import CONFIG_EN

# snips-nlu generate-dataset en dataset.yaml > dataset.json

engine = SnipsNLUEngine(config=CONFIG_EN)
with io.open('dataset.json') as f:
    dataset = json.load(f)
engine.fit(dataset)

parsing = engine.parse('Please give me some lights in the entrance !')
print(json.dumps(parsing, indent=2))
Ejemplo n.º 16
0
from __future__ import unicode_literals, print_function

import io
import json

from snips_nlu import SnipsNLUEngine, load_resources

with io.open("sample_dataset.json") as f:
    sample_dataset = json.load(f)

with io.open("configs/config_en.json") as f:
    config = json.load(f)

load_resources(sample_dataset["language"])
nlu_engine = SnipsNLUEngine(config=config)
nlu_engine.fit(sample_dataset)

text = "What will be the weather in San Francisco next week?"
parsing = nlu_engine.parse(text)
print(json.dumps(parsing, indent=2))
Ejemplo n.º 17
0
import io
import json
from snips_nlu import SnipsNLUEngine, load_resources
from snips_nlu.default_configs import CONFIG_EN
load_resources("en")
engine = SnipsNLUEngine(config=CONFIG_EN)
default_engine = SnipsNLUEngine()
 def fit(self, dataset):
     self.engine = SnipsNLUEngine(self.config).fit(dataset)
     return self
Ejemplo n.º 19
0
def label_data_with_rasa_nlu_model(lang='en', save="", out='practice'):
    """ Label counterfactual training data 

    :param lang: abbreviate language name of model
    :param save: path name where model is saved
    :return: csv file
    :rtype: file
    """
    from rasa.nlu.model import Interpreter
    from rasa.nlu.components import ComponentBuilder
    from zipfile import ZipFile
    from snips_nlu import SnipsNLUEngine
    import pickle
    import json

    model = model_path / "rasa_semeval_2020_model_task2_{}".format(save)
    if Path(model).exists():
        print("\n--> Loading Rasa model 1...")
        model1 = str(model / "nlu_20200515-042204")
        nlu_engine, nlu_engine2 = "", ""
        with codecs.open(model_path / "builder_task2_{}.pkl".format(save),
                         "rb") as ant:
            builder = pickle.load(ant)
            nlu_engine = Interpreter.load(model1, builder)
            print("\n--> Loading Snips model 2...")
            #model2 = str(model / "nlu_20200515-185057")
            #nlu_engine2 = Interpreter.load(model2, builder)
            print("\n--> Loading Snips model 3...")
            model3 = str(model_path / "snips_semeval_2020_model_task2_ct_v5")
            nlu_engine3 = SnipsNLUEngine.from_path(model3)
            print("\n--> Loading Snips model 4...")
            model4 = model_path / "snips_semeval_2020_model_task2_ct_v6"
            nlu_engine4 = SnipsNLUEngine.from_path(model4)
            print("\n--> Loading Snips model 5...")
            model5 = model_path / "snips_semeval_2020_model_task2_ct_v7"
            nlu_engine5 = SnipsNLUEngine.from_path(model5)
            print("\n--> Loading Snips model 6...")
            model6 = str(model / "nlu_20200513-075312_desc")
            nlu_engine6 = Interpreter.load(model6, builder)
            #

        if out == 'evaluate':
            print("--> [EVALUATION] Start labeling with Rasa model...")
            pd_data = pandas.read_csv(test_task_2)
            id_sent, pred = [], []
            count = 0
            for i, row in pd_data.iterrows():
                sentence = row['sentence']
                predict = nlu_engine.parse(sentence, time=3)
                antecedent, consequent = [], []
                #print("-- entities: ", predict['entities'])
                for slot in predict['entities']:
                    if slot['entity'] == "consequent":
                        consequent = [slot['start'], slot['end']]
                    if slot['entity'] == "antecedent":
                        antecedent = [slot['start'], slot['end']]

                if len(antecedent) == 0 and len(consequent) == 0:
                    """
                    predict2 = nlu_engine2.parse(sentence)
                    antecedent, consequent = [], []
                    for slot in predict['entities']:
                        if slot['entity'] == "consequent":
                            consequent = [slot['start'], slot['end']]
                        if slot['entity'] == "antecedent":
                            antecedent = [slot['start'], slot['end']] 
                    """
                    if len(predict['entities']) == 0:
                        predict3 = nlu_engine3.parse(sentence)
                        antecedent, consequent = [], []
                        for slot in predict3['slots']:
                            if slot['slotName'] == "Consequent":
                                end_id = slot['range']['end']
                                start_id = slot['range']['start']
                                #if end_id != -1: end_id = end_id - 1
                                consequent = [start_id, end_id]
                            if slot['slotName'] == "Antecedent":
                                end_id = slot['range']['end']
                                start_id = slot['range']['start']
                                #if end_id != -1: end_id = end_id - 1
                                antecedent = [start_id, end_id]

                        if len(predict3['slots']) == 0:
                            predict4 = nlu_engine4.parse(sentence)
                            antecedent, consequent = [], []
                            for slot in predict4['slots']:
                                if slot['slotName'] == "Consequent":
                                    end_id = slot['range']['end']
                                    start_id = slot['range']['start']
                                    #if end_id != -1: end_id = end_id - 1
                                    consequent = [start_id, end_id]
                                if slot['slotName'] == "Antecedent":
                                    end_id = slot['range']['end']
                                    start_id = slot['range']['start']
                                    #if end_id != -1: end_id = end_id - 1
                                    antecedent = [start_id, end_id]

                            if len(predict4['slots']) == 0:
                                predict5 = nlu_engine5.parse(sentence)
                                antecedent, consequent = [], []
                                for slot in predict5['slots']:
                                    if slot['slotName'] == "Consequent":
                                        end_id = slot['range']['end']
                                        start_id = slot['range']['start']
                                        #if end_id != -1: end_id = end_id - 1
                                        consequent = [start_id, end_id]
                                    if slot['slotName'] == "Antecedent":
                                        end_id = slot['range']['end']
                                        start_id = slot['range']['start']
                                        #if end_id != -1: end_id = end_id - 1
                                        antecedent = [start_id, end_id]

                                if len(predict4['slots']) == 0:
                                    predict6 = nlu_engine6.parse(sentence)
                                    antecedent, consequent = [], []
                                    for slot in predict6['entities']:
                                        if slot['entity'] == "consequent":
                                            consequent = [
                                                slot['start'], slot['end']
                                            ]
                                        if slot['entity'] == "antecedent":
                                            antecedent = [
                                                slot['start'], slot['end']
                                            ]

                                    if len(predict6['entities']) == 0:
                                        count += 1
                                        print("count: ", count)
                                        print(predict4)

                else:
                    print("-------- ok ! ")

                if len(antecedent) == 0: antecedent = [-1, -1]
                if len(consequent) == 0: consequent = [-1, -1]

                sent_id = row['sentenceID']
                eval_out = (sent_id, antecedent[0], antecedent[1],
                            consequent[0], consequent[1])
                pred.append(eval_out)
                # print(eval_out)

            print("no treated: ", count)
            # antecedent_endid
            results = pandas.DataFrame(data=pred,
                                       columns=[
                                           "sentenceID", "antecedent_startid",
                                           "antecedent_endid",
                                           "consequent_startid",
                                           "consequent_endid"
                                       ])
            model_saved = model_path / \
            "rasa_semeval_2020_evaluation_task2_final_{}.csv".format(save)
            results.to_csv(model_saved, index=False)

            from datetime import datetime
            dtime = datetime.now().strftime("%Y%m%d-%H%M%S")
            results_name = "rasa_semeval_2020_evaluation_{}_{}.zip".format(
                save, dtime)

            results.to_csv(model_saved, index=False)
            with ZipFile(model_path / results_name, 'w') as myzip:
                myzip.write(str(model_saved), "subtask2.csv")

        elif out == 'practice':
            print("--> [PRACTICE] Start labeling with Rasa model...")
            test_task_prac_1 = source / "data/task2-train.csv"
            pd_data = pandas.read_csv(test_task_prac_1)
            id_sent, pred = [], []
            for i, row in pd_data.iterrows():
                sentence = row['sentence']
                predict = nlu_engine.parse(sentence)
                antecedent, consequent = [], []
                for slot in predict['slots']:
                    if slot['slotName'] == "consequent":
                        end_id = slot['range']['end']
                        if end_id != -1: end_id = end_id - 1
                        consequent = [slot['range']['start'], end_id]
                    if slot['slotName'] == "antecedent":
                        end_id = slot['range']['end']
                        if end_id != -1: end_id = end_id - 1
                        antecedent = [slot['range']['start'], end_id]
                if len(antecedent) == 0: antecedent = [-1, -1]
                if len(consequent) == 0: consequent = [-1, -1]
                sent_id = row['sentenceID']
                eval_out = (sent_id, antecedent[0], antecedent[1],
                            consequent[0], consequent[1])
                pred.append(eval_out)
                print(eval_out, row['antecedent_startid'],
                      row['antecedent_endid'])

            # antecedent_endid
            results = pandas.DataFrame(data=pred,
                                       columns=[
                                           "sentenceID", "antecedent_startid",
                                           "antecedent_endid",
                                           "consequent_startid",
                                           "consequent_endid"
                                       ])

            results_name = "rasa_semeval_2020_evaluation_practice_{}.zip".format(
                save)
            model_saved = model_path / "subtask2.csv"

            results.to_csv(model_saved, index=False)
            with ZipFile(model_path / results_name, 'w') as myzip:
                myzip.write("subtask2.csv", model_saved.open())
Ejemplo n.º 20
0
def label_data_with_snips_nlu_model(lang='en', save="", out='practice'):
    """ Label counterfactual training data 

    :param lang: abbreviate language name of model
    :param save: path name where model is saved
    :return: csv file
    :rtype: file
    """
    from snips_nlu import SnipsNLUEngine
    from snips_nlu.default_configs import CONFIG_EN
    from snips_nlu_metrics import compute_train_test_metrics, compute_cross_val_metrics
    import pickle
    import json

    model = model_path / "snips_semeval_2020_model_task2_{}".format(save)
    if Path(model).exists():
        print("\n--> Loading Snips model...")
        nlu_engine = SnipsNLUEngine.from_path(model)

        if out == 'evaluate':
            print("--> [EVALUATION] Start labeling with Snips model...")
            pd_data = pandas.read_csv(test_task_2)
            id_sent, pred = [], []
            for i, row in pd_data.iterrows():
                sentence = row['sentence']
                predict = nlu_engine.parse(sentence)
                antecedent, consequent = [], []
                for slot in predict['slots']:
                    if slot['slotName'] == "Consequent":
                        end_id = slot['range']['end']
                        start_id = slot['range']['start']
                        #if end_id != -1: end_id = end_id - 1
                        consequent = [start_id, end_id]
                    if slot['slotName'] == "Antecedent":
                        end_id = slot['range']['end']
                        start_id = slot['range']['start']
                        #if end_id != -1: end_id = end_id - 1
                        antecedent = [start_id, end_id]
                if len(antecedent) == 0: antecedent = [-1, -1]
                if len(consequent) == 0: consequent = [-1, -1]
                sent_id = row['sentenceID']
                eval_out = (sent_id, antecedent[0], antecedent[1],
                            consequent[0], consequent[1])
                pred.append(eval_out)
                #print(predict)
                #print(predict['input'][antecedent[0]:antecedent[1]])
                print(eval_out)

            # antecedent_endid
            results = pandas.DataFrame(data=pred,
                                       columns=[
                                           "sentenceID", "antecedent_startid",
                                           "antecedent_endid",
                                           "consequent_startid",
                                           "consequent_endid"
                                       ])
            model_saved = model_path / \
            "snips_semeval_2020_evaluation_final_{}.csv".format(save)
            results.to_csv(model_saved, index=False)

            from datetime import datetime
            from zipfile import ZipFile
            dtime = datetime.now().strftime("%Y%m%d-%H%M%S")
            results_name = "snips_semeval_2020_evaluation_task2_{}_{}.zip".format(
                save, dtime)

            results.to_csv(model_saved, index=False)
            with ZipFile(model_path / results_name, 'w') as myzip:
                myzip.write(str(model_saved), "subtask2.csv")

            print(
                "--> [EVALUATION] End labeling and saving with Snips model...")

        elif out == 'practice':
            print("--> [PRACTICE] Start labeling with Snips model...")
            test_task_prac_1 = source / "data/task2-train.csv"
            pd_data = pandas.read_csv(test_task_prac_1)
            id_sent, pred = [], []
            for i, row in pd_data.iterrows():
                sentence = row['sentence']
                predict = nlu_engine.parse(sentence)
                antecedent, consequent = [], []
                for slot in predict['slots']:
                    if slot['slotName'] == "Consequent":
                        end_id = slot['range']['end']
                        if end_id != -1: end_id = end_id - 1
                        consequent = [slot['range']['start'], end_id]
                    if slot['slotName'] == "Antecedent":
                        end_id = slot['range']['end']
                        if end_id != -1: end_id = end_id - 1
                        antecedent = [slot['range']['start'], end_id]
                if len(antecedent) == 0: antecedent = [-1, -1]
                if len(consequent) == 0: consequent = [-1, -1]
                sent_id = row['sentenceID']
                eval_out = (sent_id, antecedent[0], antecedent[1],
                            consequent[0], consequent[1])
                pred.append(eval_out)
                print(eval_out, row['antecedent_startid'],
                      row['antecedent_endid'])

            # antecedent_endid
            results = pandas.DataFrame(data=pred,
                                       columns=[
                                           "sentenceID", "antecedent_startid",
                                           "antecedent_endid",
                                           "consequent_startid",
                                           "consequent_endid"
                                       ])

            model_saved = model_path / \
                "snips_semeval_2020_evaluation_practice_{}.csv".format(save)
            results.to_csv(model_saved, index=False)

            print(
                "--> [EVALUATION] Start labeling and saving with Snips model..."
            )
Ejemplo n.º 21
0
import io
import json
from snips_nlu import SnipsNLUEngine

nlu_engine = SnipsNLUEngine()

with open("./chatbot/data/input.json") as input_file:
    input_dict = json.load(input_file)

input = input_dict['input']

with io.open("./chatbot/data/dataset.json") as f:
    sample_dataset = json.load(f)

nlu_engine.fit(sample_dataset)
parsing = nlu_engine.parse(u"%s" % input)

file = open("./chatbot/data/output.json", "w")
file.write(json.dumps(parsing, indent=2))
from __future__ import unicode_literals, print_function

import json
from pathlib import Path

from snips_nlu import SnipsNLUEngine
from snips_nlu.default_configs import CONFIG_EN

SAMPLE_DATASET_PATH = Path(__file__).parent / "sample_dataset.json"

with SAMPLE_DATASET_PATH.open(encoding="utf8") as f:
    sample_dataset = json.load(f)

nlu_engine = SnipsNLUEngine(config=CONFIG_EN)
nlu_engine.fit(sample_dataset)

text = "What will be the weather in San Francisco next week?"
parsing = nlu_engine.parse(text)
print(json.dumps(parsing, indent=2))
Ejemplo n.º 23
0
from snips_nlu import SnipsNLUEngine
import json

engine = SnipsNLUEngine.from_path("demo")
parsing = engine.parse("Turn lights on in the bathroom please")
print("parsing...")
print(json.dumps(parsing, indent=2))
Ejemplo n.º 24
0
from flask import Flask, request
from snips_nlu import SnipsNLUEngine

app = Flask(__name__)

engine = SnipsNLUEngine.from_path('/nlu_engine/')


@app.route('/query', methods=['POST'])
def parse_query():
    query = request.form['query']
    intent = engine.parse(query)
    return intent


app.run(host='0.0.0.0')
Ejemplo n.º 25
0
class SnipsNLUFramework(NLUFramework):
    # pylint: disable=arguments-differ
    async def construct(  # type: ignore
            self,
            global_config: GlobalConfig,
            skip_language_installations: bool = False) -> None:
        """
        Args:
            global_config: Global configuration for the whole test framework.
            skip_language_installations: A boolean indicating whether to skip the installation of
                required language resources. Defaults to False.
        """

        self.__python = global_config.python
        self.__skip_language_installations = skip_language_installations

    # pylint: disable=attribute-defined-outside-init
    async def prepareDataSet(self, data_set: NLUDataSet) -> None:
        last_exception = None

        # Try all language tag derivations, from specific to broad
        for language in Language.get(
                data_set.language).simplify_script().broaden():
            language = language.to_tag()
            try:
                if not self.__skip_language_installations:
                    self._logger.info(
                        "Installing language resources for \"%s\"...",
                        language)

                    subprocess.run([
                        self.__python, "-m", "snips_nlu", "download", language
                    ],
                                   check=True)

                self.__language = language

                last_exception = None
                break
            except BaseException as e:  # pylint: disable=broad-except
                last_exception = e

        if last_exception is not None:
            raise last_exception

    async def unprepareDataSet(self) -> None:
        del self.__language

    # pylint: disable=attribute-defined-outside-init
    async def train(self, training_data: List[NLUDataEntry]) -> None:
        self.__engine = SnipsNLUEngine(DEFAULT_CONFIGS[self.__language])

        intents = {}

        for intent in {x.intent for x in training_data}:
            utterances = []

            # Lambda not correctly supported in mypy
            for entry in filter(lambda x, i=intent: x.intent == i,
                                training_data):  # type: ignore
                utterances.append({"data": [{"text": entry.sentence}]})

            intents[intent] = {"utterances": utterances}

        self.__engine.fit({
            "language": self.__language,
            "intents": intents,
            "entities": {}
        })

    async def rateIntents(self, sentence: str) -> NLUIntentRating:
        intents = self.__engine.get_intents(sentence)

        return NLUIntentRating(sentence, [(x["intentName"], x["probability"])
                                          for x in intents])

    async def cleanupTraining(self) -> None:
        del self.__engine
Ejemplo n.º 26
0
import sys
import io
import os
import shutil
from io import TextIOWrapper, BytesIO
from contextlib import redirect_stdout
import json

from snips_nlu.cli import training
from snips_nlu.cli import generate_dataset
from snips_nlu import SnipsNLUEngine
from snips_nlu.default_configs import CONFIG_EN

engine = SnipsNLUEngine(config=CONFIG_EN)

def train_model(_input, name, sub=""):
    input = os.path.join("..", "Victor.NLU.Snips", "Datasets", sub, _input)
    if not os.path.exists(input) or not os.path.isfile(input):
        raise f'The file {input}" does not exist'

    output = input.replace(".yaml", ".json")
    with TextIOWrapper(io.FileIO(output, "wb"), sys.stdout.encoding) as buf, redirect_stdout(buf):
        generate_dataset.generate_dataset("en", input)
        buf.flush()
        buf.close()
    
    engine_path = os.path.join("..", "Victor.NLU.Snips", "Engines", sub, name)
    if os.path.isdir(engine_path): 
        print("Overwriting existing engine directory {0}.".format(engine_path))
        shutil.rmtree(engine_path)
        
Ejemplo n.º 27
0
"""
This file is responsible for training and saving the SnipsNLU Engine
"""

import json

from snips_nlu import SnipsNLUEngine
from snips_nlu.default_configs import CONFIG_EN

engine = SnipsNLUEngine(config=CONFIG_EN)

with open("dataset.json") as f:
    dataset = json.load(f)

engine.fit(dataset)
engine.persist("persisted_engine")
Ejemplo n.º 28
0
            "models/" + lang + '_transformer',
            num_labels=numKeys,
            use_cuda=False,
            args={
                'reprocess_input_data': True,
                'overwrite_output_dir': True,
                'num_train_epochs': 15,
                "train_batch_size": 16,
                "eval_batch_size": 16,
                'no_cache': True,
                'use_cached_eval_features': False,
                'save_model_every_epoch': False
            })
        transformer_engines[lang] = model

        snips_engines[lang] = SnipsNLUEngine.from_path("models/" + str(lang))

        print("**loaded " + lang + " ***")

    except Exception as e:
        print(e)

translationList = ["en"]
for lgc in translationList:
    # from de to language
    mname = 'Helsinki-NLP/opus-mt-de-' + lgc
    model = MarianMTModel.from_pretrained(mname)
    tok = MarianSentencePieceTokenizer.from_pretrained(mname)

    translation_engines[mname] = model
    translation_tokenizers[mname] = tok
Ejemplo n.º 29
0
class SnipsInterpreter(Interpreter):
    def __init__(self):
        super(SnipsInterpreter, self).__init__('snips')

        self._meta = None
        self._training_data = None
        self._lang = None
        self._engine = None
        self._entity_parser = None

        self._log.info('Using snips-nlu version %s' % __version__)

    def metadata(self):
        return self._meta

    def lang(self):
        return self._lang

    def training(self):
        result = []

        for intent, data in self._training_data['intents'].items():
            for utterance in data['utterances']:
                result.append({
                    'text':
                    ''.join([u['text'] for u in utterance['data']]),
                    'intent':
                    intent,
                })

        return result

    def fit(self, training_file_path, trained_directory_path):
        filename, _ = os.path.splitext(os.path.basename(training_file_path))

        # TODO check what should be in the base Interpreter class

        trained_path = os.path.join(trained_directory_path,
                                    '%s.trained.json' % filename)
        checksum_path = os.path.join(trained_directory_path,
                                     '%s.checksum' % filename)

        with open(training_file_path) as f:
            training_str = f.read()
            self._training_data = json.loads(training_str)
            self._lang = self._training_data['language']
            self._log.info('Loading resources for language %s' % self._lang)
            load_resources(self._lang)

        same, computed_checksum = self.checksum_match(training_str,
                                                      checksum_path)

        # Checksums match, load the engine from trained file
        if same and os.path.isfile(trained_path):
            self._log.info('Checksum matched, loading trained engine')
            with open(trained_path) as f:
                self._engine = SnipsNLUEngine.from_dict(json.load(f))
        else:
            self._log.info('Checksum has changed, retraining the engine')
            self._engine = SnipsNLUEngine()
            self._engine.fit(self._training_data)

            with open(trained_path, mode='w') as f:
                json.dump(self._engine.to_dict(), f)

            with open(checksum_path, mode='w') as f:
                f.write(computed_checksum)

        self._entity_parser = BuiltinEntityParser(self._lang)
        self._meta = {
            k: list(v.keys())
            for k, v in
            self._engine._dataset_metadata['slot_name_mappings'].items()
        }

    def parse_entity(self, msg, intent, slot):
        entity_label = self._engine._dataset_metadata[
            'slot_name_mappings'].get(intent, {}).get(slot)

        # TODO try to find a way to retrieve multiple slot values, that's a hard one
        # May be we can try matching on _dataset_metadata['entities']

        if entity_label:
            if is_builtin_entity(entity_label):
                parsed = self._entity_parser.parse(msg)

                if parsed:
                    return [parsed[0]['entity']]

        # TODO if slot is not an auto-extensible, use fuzzy matching to match with restricted values

        return super(SnipsInterpreter, self).parse_entity(msg, intent, slot)

    def parse(self, msg):

        # TODO manage multiple intents in the same sentence

        parsed = self._engine.parse(msg)

        if parsed['intent'] == None:
            return []

        slots = {}

        # Constructs a slot dictionary with slot value as a list if multiples matched
        for slot in parsed['slots']:
            name = slot['slotName']
            value = slot['value']

            if name in slots:
                slots[name].append(value)
            else:
                slots[name] = [value]

        return [{
            'text': msg,
            'intent': parsed['intent']['intentName'],
            'slots': slots,
        }]
Ejemplo n.º 30
0
import io
import json

from snips_nlu import SnipsNLUEngine
from snips_nlu.default_configs import CONFIG_EN  #For German please use ..._DE

engine = SnipsNLUEngine(config=CONFIG_EN)

with io.open(
        "dataset.json"
) as f:  #dataset.json needs to be changed to the JSON you generated via YAML
    dataset = json.load(f)

engine.fit(dataset)
engine.persist("path/to/directory")
Ejemplo n.º 31
0
import io
import json
from snips_nlu import SnipsNLUEngine
from snips_nlu.default_configs import CONFIG_EN

nlu_engine = SnipsNLUEngine(config=CONFIG_EN)


def train():
    with io.open('dataset.json') as f:
        dataset = json.load(f)
    print("training.......")
    nlu_engine.fit(dataset)
    nlu_engine.persist("demo")


train()
parsing = nlu_engine.parse("Hey, lights on in the lounge !")
print("parsing...")
print(json.dumps(parsing, indent=2))

# Use CLI to generate dataset, snips-nlu generate-dataset en dataset.yaml > dataset.json
Ejemplo n.º 32
0
def init_snipsnlu():
    engine = SnipsNLUEngine(config=CONFIG_EN)
    with io.open("proj_new.json") as f:
        dataset = json.load(f)
        engine.fit(dataset)
    return engine
Ejemplo n.º 33
0
config.gpu_options.allow_growth = True
config.gpu_options.per_process_gpu_memory_fraction = 0.6

session = tf.Session(config=config)

keras.backend.set_session(session)

# Load dataset for intent detection
try:
    with io.open(str(Path('app', 'static', 'samples', 'dataset.json'))) as fr:
        dataset = json.load(fr)
except Exception as e:
    print("Could not load dataset {}".format(str(e)))

# Train dataset
nlu_engine = SnipsNLUEngine(resources=load_resources("snips_nlu_en"))
nlu_engine.fit(dataset)

# Load class names
print("[INFO]: Loading Classes")
classNames = np.load(
    "/NeMo/examples/applications/asr_service/app/emotion-classification/new_classes.npy"
)

# Load tokenizer pickle file
print("[INFO]: Loading Tokens")
with open(
        '/NeMo/examples/applications/asr_service/app/emotion-classification/new_tokenizer.pickle',
        'rb') as handle:
    Tokenizer = pickle.load(handle)
"""
import io
import json
import sys

from snips_nlu import SnipsNLUEngine
from snips_nlu.default_configs import CONFIG_EN


with open('/Users/ayaali/Documents/Geni_chatbot/Genei/question_new.txt') as fl:
    mylist = [line.rstrip('\n') for line in fl]
    
    questions=["Has DeNovoSeq a job tracking system? ","What is functional annotation? ", "Which assembler should I use if I manage prokaryotic data?",
               "What is the FTP browser of DeNovoSeq?"];

default_engine = SnipsNLUEngine()
#%%

engine = SnipsNLUEngine(config=CONFIG_EN)
with io.open("/Users/ayaali/Documents/Geni_chatbot/Genei/dataset.json") as fil:
    dataset = json.load(fil)

#%%
res = []
with open('/Users/ayaali/Documents/Geni_chatbot/Genei/out_new.txt', 'w') as f:
    engine.fit(dataset)
    seed = 42
    engine = SnipsNLUEngine(config=CONFIG_EN, random_state=seed)
    engine.fit(dataset)
    for ques in mylist:
        parsing = engine.parse(ques)
Ejemplo n.º 35
0
from __future__ import unicode_literals
from snips_nlu import SnipsNLUEngine, load_resources
from SPARQLWrapper import SPARQLWrapper, JSON
import sys
import io
import json
import snips_nlu

resultado = []
snips_nlu.load_resources("es")
reload(sys)
sys.setdefaultencoding('utf8')
# lectura del archivo de entrenamiento para identificar la intencion
with io.open("dataset.json") as f:
    engine_dict = json.load(f)
engine = SnipsNLUEngine()
engine.fit(engine_dict)


# metodo para obtener el detalle de la pregunta realizada
def pregunta(frase):
    r = engine.parse(unicode(frase))
    return json.dumps(r, indent=2)


# conexion a VIRTUOSO
sparql = SPARQLWrapper("http://localhost:8890/sparql/plantas1")


# metodos para las consultas a VIRTUOSO
def tipos(entidad):
Ejemplo n.º 36
0
import io
import json
import sys

from snips_nlu import SnipsNLUEngine
from snips_nlu.default_configs import CONFIG_EN
from SimpleWebSocketServer import SimpleWebSocketServer, WebSocket
import threading

seed = 42
engine = SnipsNLUEngine(config=CONFIG_EN, random_state=seed)

server = None
clients = []


class SimpleWSServer(WebSocket):
    def handleConnected(self):
        clients.append(self)

    def handleClose(self):
        clients.remove(self)


def run_server():
    global server
    global stop_thread
    server = SimpleWebSocketServer('',
                                   9000,
                                   SimpleWSServer,
                                   selectInterval=(1000.0 / 15) / 1000)
def train_eval_snips_nlu_model(lang='en', cross=False, save=''):
    """ Train snips data from all brat annotation object 

    :param lang: abbreviate language name 
    :param save: path where model will be save
    :return: None
    :rtype: None
    """
    from snips_nlu import SnipsNLUEngine
    from snips_nlu.default_configs import CONFIG_EN
    from snips_nlu_metrics import compute_train_test_metrics, compute_cross_val_metrics
    import pickle
    import json

    if cross:
        train_data_obj = BuildSnipsDataTask2(lang,
                                             cross=cross,
                                             vers=save,
                                             add_entities=True)
        train_data = train_data_obj.build_snips_data_task2()
        print("--> Evaluating training data with Snips metrics...")
        filename_results = source_result / "snips_semeval_2020_evaluation_task2_{}.pkl".format(
            save)
        if not Path(filename_results).exists():
            tt_metrics = compute_train_test_metrics(
                train_dataset=train_data[0],
                test_dataset=train_data[1],
                engine_class=SnipsNLUEngine,
                include_slot_metrics=True)
            #print(tt_metrics)
            if not Path(filename_results).exists():
                with codecs.open(filename_results, 'wb') as metric:
                    pickle.dump(tt_metrics, metric)
                from datetime import datetime
                dmtime = "_{}_{}".format(
                    save,
                    datetime.now().strftime("%Y%m%d-%H%M%S"))
                name = "snips_semeval_2020_evaluation_task2{}.json".format(
                    dmtime)
                filename_results_json = source_result / name
                with codecs.open(filename_results_json, 'w',
                                 "utf-8") as m_json:
                    json.dump(tt_metrics, m_json)

    else:
        filename_results = source_result / "snips_semeval_2020_model_task2_{}".format(
            save)
        train_data_obj = BuildSnipsDataTask2(lang,
                                             cross=cross,
                                             vers=save,
                                             add_entities=True)
        train_data = train_data_obj.build_snips_data_task2()
        #print(CONFIG_EN)
        nlu_engine = SnipsNLUEngine(CONFIG_EN)
        print("--> Training patent data with Snips...")
        nlu_engine.fit(train_data)
        """
        try:     
            print("--> Saving model trained with Snips (JOBLIB)...")
            filename_joblib = source_result / "snips_semeval_2020_model_task2_{}.pkl".format(save)            
            with codecs.open(filename_joblib, 'wb') as metric:
                pickle.dump(nlu_engine, metric)
        except: pass
        """
        print("--> Saving model trained with Snips (SNIPS)...")
        try:
            nlu_engine.persist(filename_results)
        except Exception as e:
            print("error saving the madel....{}".format(str(e)))