Exemple #1
0
def create_nlp_model() -> SnipsNLUEngine:
    """
    This function trains a new ML model from the given dataset. It then saves the model in the root directory of the project with the file name: nlpumodel
    This function will only be called once, at the start of the program, if nlumodel file is not detected in the current directory
    Parameters required: None
    Return data: Trained SnipsNLUEngine object
    """
    # Creating a barebones engine
    engine = SnipsNLUEngine(config=CONFIG_EN)

    # Creating dataset from yaml files present in nlputrain directory
    data = dataset.Dataset.from_yaml_files(
        "en", ["./nlputrain/" + i for i in os.listdir("./nlputrain/") if ".yaml" in i]
    )

    # Training the engine with given dataset
    engine.fit(data)

    # Persisting the engine so it can be used easily later
    # Persisting engine is saved in nlumodel folder
    try:
        engine.persist("nlumodel")
    except PersistingError:
        print("Old NLP file still exists. Deleting..")
        # Removing old model files using shutil
        shutil.rmtree("nlumodel")
        engine.persist("nlumodel")

    print("NLP model has been created and saved in directory: nlumodel")
    # Returning trained engine
    return engine
Exemple #2
0
class SnipsService(ApiService):
    def __init__(self,
                 classes,
                 model_path=None,
                 max_api_calls=None,
                 verbose=False):
        super().__init__(classes, max_api_calls, verbose)
        load_resources('en')
        if model_path:
            self.load_model(model_path)
        else:
            self.engine = SnipsNLUEngine(config=CONFIG_EN)

    def train_model(self, dataset):
        self.engine.fit(dataset)

    def train_model_from_file(self, dataset_path):
        with io.open(dataset_path) as f:
            self.train_model(json.load(f))

    def save_model(self, model_path):
        self.engine.persist(model_path)

    def load_model(self, model_path):
        self.engine = SnipsNLUEngine.from_path(model_path)

    def predict(self, utterance):
        result = self.engine.parse(utterance)
        try:
            return result['intent']['intentName']
        except Exception as e:
            print('ERR:', e)
            print('Failed to parse: "{}"'.format(utterance))
            print(result)
            return None
Exemple #3
0
def save_engine(engine: SnipsNLUEngine, path: str) -> None:
    """Save trained snips nlu engine in a tar.gz archive."""
    with tarfile.open(path, "w:gz") as archive:
        with TemporaryDirectory() as tmp:
            path = os.path.join(tmp, "engine")

            logger.debug("Saving engine to path %s", path)
            engine.persist(path)
            archive.add(path, arcname="engine")
def train_eval_snips_nlu_model(lang='en', cross=False, save=''):
    """ Train snips data from all brat annotation object 

    :param lang: abbreviate language name 
    :param save: path where model will be save
    :return: None
    :rtype: None
    """
    from snips_nlu import SnipsNLUEngine
    from snips_nlu.default_configs import CONFIG_EN
    from snips_nlu_metrics import compute_train_test_metrics, compute_cross_val_metrics
    import pickle
    import json

    if cross:
        train_data_obj = BuildSnipsDataTask1(lang, cross=cross, vers=save)
        train_data = train_data_obj.build_snips_data_task1()
        print("--> Evaluating training data with Snips metrics...")
        filename_results = source_result / "snips_semeval_2020_evaluation_task1_{}.pkl".format(save)
        if not Path(filename_results).exists():
            tt_metrics = compute_train_test_metrics(train_dataset=train_data[0],
                                                test_dataset=train_data[1],
                                                engine_class=SnipsNLUEngine,
                                                include_slot_metrics=False)
            #print(tt_metrics)
            if not Path(filename_results).exists():
                print("--> Writing snips nlu metrics data to file...")
                with codecs.open(filename_results, 'wb') as metric:
                    pickle.dump(tt_metrics, metric)
                from datetime import datetime
                dmtime = "_{}_{}".format(save, datetime.now().strftime("%Y%m%d-%H%M%S"))
                name = "snips_semeval_2020_evaluation_task1{}.json".format(dmtime)
                filename_results_json = source_result / name
                with codecs.open(filename_results_json, 'w', "utf-8") as m_json:
                    json.dump(tt_metrics, m_json)

    else:
        filename_results = source_result / "snips_semeval_2020_model_task1_{}".format(save)
        train_data_obj = BuildSnipsDataTask1(lang, cross=cross, vers=save)
        train_data = train_data_obj.build_snips_data_task1()
        nlu_engine = SnipsNLUEngine(config=CONFIG_EN)
        print("--> Training patent data with Snips...")
        nlu_engine.fit(train_data)
        try:     
            print("--> Saving model trained with Snips (JOBLIB)...")
            filename_joblib = source_result / "snips_semeval_2020_model_task1_{}.pkl".format(save)            
            with codecs.open(filename_joblib, 'wb') as metric:
                pickle.dump(nlu_engine, metric)
        except: pass
        print("--> Saving model trained with Snips (SNIPS)...")
        try: nlu_engine.persist(filename_results)
        except: pass
Exemple #5
0
    def test_parse(self):
        # Given / When
        dataset_stream = io.StringIO(u"""
---
type: intent
name: MakeTea
utterances:
  - make me a [beverage_temperature:Temperature](hot) cup of tea
  - make me [number_of_cups:snips/number](five) tea cups

---
type: intent
name: MakeCoffee
utterances:
  - brew [number_of_cups:snips/number](one) cup of coffee please
  - make me [number_of_cups] cups of coffee""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        nlu_engine = SnipsNLUEngine().fit(dataset)
        nlu_engine.persist(self.tmp_file_path)

        # When / Then
        output_target = io.StringIO()
        with self.fail_if_exception("Failed to parse using CLI script"):
            with redirect_stdout(output_target):
                parse(str(self.tmp_file_path), "Make me two cups of coffee")
        output = output_target.getvalue()

        # Then
        expected_output = """{
  "input": "Make me two cups of coffee",
  "intent": {
    "intentName": "MakeCoffee",
    "probability": 1.0
  },
  "slots": [
    {
      "entity": "snips/number",
      "range": {
        "end": 11,
        "start": 8
      },
      "rawValue": "two",
      "slotName": "number_of_cups",
      "value": {
        "kind": "Number",
        "value": 2.0
      }
    }
  ]
}
"""
        self.assertEqual(expected_output, output)
Exemple #6
0
def train(dataset_file_path, train_directory):
    with io.open(dataset_file_path) as f:
        dataset = json.load(f)

    language = dataset.get("language", None)
    config = language_configs.get(language, None)
    if config is None:
        raise Exception(
            f"No language configuration for language {dataset.language}")

    nlu_engine = SnipsNLUEngine(config=config)
    nlu_engine.fit(dataset)
    nlu_engine.persist(train_directory)
Exemple #7
0
def main():
    """
    This builds a training dataset, trains an NLU engine with it, and saves that engine. This must be done any time a
    new command is added or utterances are edited for a command. If an NLU engine already exists, it is deleted.
    """
    training_json = json.loads(build_training_dataset())
    engine_path = os.path.join('Voithos', 'utilities', 'NLU')

    nlu_engine = SnipsNLUEngine(config=CONFIG_EN)
    nlu_engine = nlu_engine.fit(training_json)

    try:
        nlu_engine.persist(engine_path)
    except PersistingError:
        shutil.rmtree(engine_path)
        nlu_engine.persist(engine_path)
    def test_engine_with_keyword_slot_filler_should_be_serializable(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: SetLightColor
utterances:
- set the light to [color](blue) in the [room](kitchen)
- please make the lights [color](red) in the [room](bathroom)""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        intent = "SetLightColor"
        slot_filler_config = {
            "unit_name": "keyword_slot_filler",
            "lowercase": True
        }
        parser_config = ProbabilisticIntentParserConfig(
            slot_filler_config=slot_filler_config)
        engine_config = NLUEngineConfig([parser_config])
        engine = SnipsNLUEngine(engine_config).fit(dataset, intent)
        engine.persist(self.tmp_file_path)
        text = "I want Red lights in the kitchen now"

        # When
        loaded_engine = SnipsNLUEngine.from_path(self.tmp_file_path)
        res = loaded_engine.parse(text)

        # Then
        expected_slots = [
            custom_slot(
                unresolved_slot(match_range={
                    START: 7,
                    END: 10
                },
                                value="Red",
                                entity="color",
                                slot_name="color"), "red"),
            custom_slot(
                unresolved_slot(match_range={
                    START: 25,
                    END: 32
                },
                                value="kitchen",
                                entity="room",
                                slot_name="room"))
        ]
        self.assertListEqual(expected_slots, res["slots"])
Exemple #9
0
def train(dataset_path, output_path, config_path):
    """Train an NLU engine on the provided dataset"""
    with Path(dataset_path).open("r", encoding="utf8") as f:
        dataset = json.load(f)

    config = None
    if config_path is not None:
        with Path(config_path).open("r", encoding="utf8") as f:
            config = json.load(f)

    load_resources(dataset["language"])
    print("Create and train the engine...")
    engine = SnipsNLUEngine(config).fit(dataset)

    print("Persisting the engine...")
    engine.persist(output_path)

    print("Saved the trained engine to %s" % output_path)
Exemple #10
0
def train_nlu():
    with io.open("training_data/dataset.json") as f:
        sample_dataset = json.load(f)

    nlu_engine = SnipsNLUEngine()

    print("Snips training started")
    train_start = datetime.datetime.now()
    nlu_engine = nlu_engine.fit(sample_dataset)

    if os.path.exists(model_path):
        shutil.rmtree(model_path, ignore_errors=True)
        nlu_engine.persist(model_path)
    else:
        nlu_engine.persist(model_path)

    train_end = datetime.datetime.now()
    print("Total time to train the Snips model: {0}".format(train_end -
                                                            train_start))
Exemple #11
0
def train(dataset_path, output_path, config_path, verbose):
    """Train an NLU engine on the provided dataset"""
    if verbose:
        set_nlu_logger(logging.DEBUG)
    with Path(dataset_path).open("r", encoding="utf8") as f:
        dataset = json.load(f)

    config = None
    if config_path is not None:
        with Path(config_path).open("r", encoding="utf8") as f:
            config = json.load(f)

    print("Create and train the engine...")
    engine = SnipsNLUEngine(config).fit(dataset)

    print("Persisting the engine...")
    engine.persist(output_path)

    print("Saved the trained engine to %s" % output_path)
Exemple #12
0
def train(dataset_path,
          output_path,
          config_path=None,
          verbose=False,
          random_seed=None):
    """Train an NLU engine on the provided dataset"""
    import json
    import logging
    from pathlib import Path

    from snips_nlu import SnipsNLUEngine
    from snips_nlu.cli.utils import set_nlu_logger
    from snips_nlu.common.utils import check_random_state

    if verbose == 1:
        set_nlu_logger(logging.INFO)
    elif verbose >= 2:
        set_nlu_logger(logging.DEBUG)

    with Path(dataset_path).open("r", encoding="utf8") as f:
        dataset = json.load(f)

    config = None
    if config_path is not None:
        with Path(config_path).open("r", encoding="utf8") as f:
            config = json.load(f)

    random_state = check_random_state(random_seed)

    print("Create and train the engine...")
    engine = SnipsNLUEngine(config, random_state=random_state).fit(dataset)

    print("Persisting the engine...")
    engine.persist(output_path)

    print("Saved the trained engine to %s" % output_path)
Exemple #13
0
def init(seed):
    with io.open("Dataset/dataset.json",encoding="utf8") as f:
        sample_dataset = json.load(f)

    print("initialising  Lynda")

    from snips_nlu import SnipsNLUEngine

    from snips_nlu.default_configs import CONFIG_EN

    nlu_engine = SnipsNLUEngine(config=CONFIG_EN, random_state=seed)                                                  #training the nlu model


    nlu_engine.fit(sample_dataset)

    print("momdel created")





    nlu_engine.persist('model')                                                                                        # saving the trained model

    print("model dumped")
from snips_nlu import SnipsNLUEngine
from snips_nlu.default_configs import CONFIG_EN

seed = 42
engine = SnipsNLUEngine(config=CONFIG_EN, random_state=seed)

dataset_name = 'dataset1.json'

get_current_working_directory = os.getcwd()
splitted_current_working_directory = get_current_working_directory.split(
    os.sep)
# print(splitted_current_working_directory)
directory_index = splitted_current_working_directory.index('NLU')
# print(directory_index)
root_directory_path = os.sep.join(
    splitted_current_working_directory[:directory_index + 1])
# print(root_directory_path)
dataset_path = os.path.join(root_directory_path, 'dataset', 'json',
                            dataset_name)

with open(dataset_path, encoding='utf-16', errors='ignore') as f:
    dataset = json.load(f, strict=False)

print("TRAINING THE ENGINE...")

engine.fit(dataset)

shutil.rmtree(os.path.join(root_directory_path, 'trained_model'))
engine.persist(os.path.join(root_directory_path, 'trained_model'))

print("ENGINE TRAINED.")
import io
import json

from snips_nlu import SnipsNLUEngine
from snips_nlu.default_configs import CONFIG_EN

# snips-nlu generate-dataset en ticket.yaml > ticket.json

# The first step is to understand which intent the sentence is about.
# The second step is to extract the parameters, a.k.a. the slots of the sentence.
if __name__ == "__main__":
    engine = SnipsNLUEngine(config=CONFIG_EN, random_state=233)

    with io.open("dataset/ticket.json") as f:
        dataset = json.load(f)

    # Train model.
    engine.fit(dataset)

    # Save model.
    engine.persist("snips.model")

    # # Test
    # parsing = engine.parse("Can I get 2 tickets for the big short?")
    # print(json.dumps(parsing, indent=2))
Exemple #16
0
import io
import json

from snips_nlu import SnipsNLUEngine
from snips_nlu.default_configs import CONFIG_DE

engine = SnipsNLUEngine(config=CONFIG_DE)
with io.open("dataset.json") as f:
    dataset = json.load(f)

engine.fit(dataset)
engine.persist("QBo_Model")
Exemple #17
0
def train(
    sentences_dict: typing.Dict[str, str],
    language: str,
    slots_dict: typing.Optional[typing.Dict[str, typing.List[str]]] = None,
    engine_path: typing.Optional[typing.Union[str, Path]] = None,
    dataset_path: typing.Optional[typing.Union[str, Path]] = None,
) -> SnipsNLUEngine:
    """Generate Snips YAML dataset from Rhasspy sentences/slots."""
    slots_dict = slots_dict or {}

    _LOGGER.debug("Creating Snips engine for language %s", language)
    engine = SnipsNLUEngine(config=DEFAULT_CONFIGS[language])

    # Parse JSGF sentences
    _LOGGER.debug("Parsing sentences")
    with io.StringIO() as ini_file:
        # Join as single ini file
        for lines in sentences_dict.values():
            print(lines, file=ini_file)
            print("", file=ini_file)

        intents = rhasspynlu.parse_ini(ini_file.getvalue())

    # Split into sentences and rule/slot replacements
    sentences, replacements = rhasspynlu.ini_jsgf.split_rules(intents)

    for intent_sentences in sentences.values():
        for sentence in intent_sentences:
            rhasspynlu.jsgf.walk_expression(sentence,
                                            rhasspynlu.number_range_transform,
                                            replacements)

    # Convert to directed graph *without* expanding slots
    # (e.g., $rhasspy/number)
    _LOGGER.debug("Converting to intent graph")
    intent_graph = rhasspynlu.sentences_to_graph(sentences,
                                                 replacements=replacements,
                                                 expand_slots=False)

    # Get start/end nodes for graph
    start_node, end_node = rhasspynlu.jsgf_graph.get_start_end_nodes(
        intent_graph)
    assert (start_node
            is not None) and (end_node
                              is not None), "Missing start/end node(s)"

    if dataset_path:
        # Use user file
        dataset_file = open(dataset_path, "w+")
    else:
        # Use temporary file
        dataset_file = typing.cast(
            typing.TextIO, tempfile.NamedTemporaryFile(suffix=".yml",
                                                       mode="w+"))
        dataset_path = dataset_file.name

    with dataset_file:
        _LOGGER.debug("Writing YAML dataset to %s", dataset_path)

        # Walk first layer of edges with intents
        for _, intent_node, edge_data in intent_graph.edges(start_node,
                                                            data=True):
            intent_name: str = edge_data["olabel"][9:]

            # New intent
            print("---", file=dataset_file)
            print("type: intent", file=dataset_file)
            print("name:", quote(intent_name), file=dataset_file)
            print("utterances:", file=dataset_file)

            # Get all paths through the graph (utterances)
            used_utterances: typing.Set[str] = set()
            paths = nx.all_simple_paths(intent_graph, intent_node, end_node)
            for path in paths:
                utterance = []
                entity_name = None
                slot_name = None
                slot_value = None

                # Walk utterance edges
                for from_node, to_node in rhasspynlu.utils.pairwise(path):
                    edge_data = intent_graph.edges[(from_node, to_node)]
                    ilabel = edge_data.get("ilabel")
                    olabel = edge_data.get("olabel")
                    if olabel:
                        if olabel.startswith("__begin__"):
                            slot_name = olabel[9:]
                            entity_name = None
                            slot_value = ""
                        elif olabel.startswith("__end__"):
                            if entity_name == "rhasspy/number":
                                # Transform to Snips number
                                entity_name = "snips/number"
                            elif not entity_name:
                                # Collect actual value
                                assert (
                                    slot_name and slot_value
                                ), f"No slot name or value (name={slot_name}, value={slot_value})"

                                entity_name = slot_name
                                slot_values = slots_dict.get(slot_name)
                                if not slot_values:
                                    slot_values = []
                                    slots_dict[slot_name] = slot_values

                                slot_values.append(slot_value.strip())

                            # Reference slot/entity (values will be added later)
                            utterance.append(f"[{slot_name}:{entity_name}]")

                            # Reset current slot/entity
                            entity_name = None
                            slot_name = None
                            slot_value = None
                        elif olabel.startswith("__source__"):
                            # Use Rhasspy slot name as entity
                            entity_name = olabel[10:]

                    if ilabel:
                        # Add to current slot/entity value
                        if slot_name and (not entity_name):
                            slot_value += ilabel + " "
                        else:
                            # Add directly to utterance
                            utterance.append(ilabel)
                    elif (olabel and (not olabel.startswith("__"))
                          and slot_name and (not slot_value)
                          and (not entity_name)):
                        slot_value += olabel + " "

                if utterance:
                    utterance_str = " ".join(utterance)
                    if utterance_str not in used_utterances:
                        # Write utterance
                        print("  -", quote(utterance_str), file=dataset_file)
                        used_utterances.add(utterance_str)

            print("", file=dataset_file)

        # Write entities
        for slot_name, values in slots_dict.items():
            if slot_name.startswith("$"):
                # Remove arguments and $
                slot_name = slot_name.split(",")[0][1:]

            # Skip numbers
            if slot_name in {"rhasspy/number"}:
                # Should have been converted already to snips/number
                continue

            # Keep only unique values
            values_set = set(values)

            print("---", file=dataset_file)
            print("type: entity", file=dataset_file)
            print("name:", quote(slot_name), file=dataset_file)
            print("values:", file=dataset_file)

            slot_graph = rhasspynlu.sentences_to_graph({
                slot_name: [
                    rhasspynlu.jsgf.Sentence.parse(value)
                    for value in values_set
                ]
            })

            start_node, end_node = rhasspynlu.jsgf_graph.get_start_end_nodes(
                slot_graph)
            n_data = slot_graph.nodes(data=True)
            for path in nx.all_simple_paths(slot_graph, start_node, end_node):
                words = []
                for node in path:
                    node_data = n_data[node]
                    word = node_data.get("word")
                    if word:
                        words.append(word)

                if words:
                    print("  -", quote(" ".join(words)), file=dataset_file)

            print("", file=dataset_file)

        # ------------
        # Train engine
        # ------------

        if engine_path:
            # Delete existing engine
            engine_path = Path(engine_path)
            engine_path.parent.mkdir(exist_ok=True)

            if engine_path.is_dir():
                # Snips will fail it the directory exists
                _LOGGER.debug("Removing existing engine at %s", engine_path)
                shutil.rmtree(engine_path)
            elif engine_path.is_file():
                _LOGGER.debug("Removing unexpected file at %s", engine_path)
                engine_path.unlink()

        _LOGGER.debug("Training engine")
        dataset_file.seek(0)
        dataset = Dataset.from_yaml_files(language, [dataset_file])
        engine = engine.fit(dataset)

    if engine_path:
        # Save engine
        engine.persist(engine_path)
        _LOGGER.debug("Engine saved to %s", engine_path)

    return engine
Exemple #18
0
class SnipsNluTrainer:
    """Class to train Snips NLU with training data from Cloudant DB with rollback support."""
    def __init__(self, database_context, cos_context):
        #Cloudant DB
        self.context = database_context
        self.training_data = {}
        self.cos_context = cos_context
        self._check_trainer_dir(ENGINE_PATH_ZIP)

        load_resources("de")
        load_resources("en")
        self.nlu_engine = SnipsNLUEngine()

    def start_training(self):
        self._load_training_data()
        self._train_nlu()
        result = self._persist_nlu()
        return result

    def rollback_nlu(self):
        result = False
        if not ENGINE_PATH_NEW.exists():
            print("No backups exist locally..")
            if not self.cos_context.file_exist_in_bucket(OLD_ENGINE_NAME_ZIP):
                print("There are no backups in bucket..")
                print("Data rollback is not possible!")
            else:
                print("Found saved backups in bucket..")
                self._load_from_bucket(ENGINE_PATH_ZIP, OLD_ENGINE_NAME_ZIP,
                                       ENGINE_PATH_ZIP)
                print("Restored backup from bucket to '{0}'".format(
                    ENGINE_PATH_ZIP))
                self.rollback_nlu()
        else:
            loaded_engine = SnipsNLUEngine.from_path(ENGINE_PATH_NEW)
            self.nlu_engine = loaded_engine
            #Remove new/old local nlu folders. Save backup as new engine
            #shutil.rmtree(ENGINE_PATH_NEW)
            #shutil.rmtree(ENGINE_PATH_OLD)
            result = self._persist_nlu()
            print("Engine rollback was successful")
        return result

    def get_nlu_engine(self):
        if not ENGINE_PATH_NEW.exists():
            print("No engine found locally...")
            print("Searching in bucket...")
            if not self.cos_context.file_exist_in_bucket(NEW_ENGINE_NAME_ZIP):
                print("There are no engine in bucket!")
                print("Engine must be fitted! Please run 'start training'")
                return ""
            else:
                print("Found saved engine in bucket..")
                self._load_from_bucket(ENGINE_PATH_ZIP, NEW_ENGINE_NAME_ZIP,
                                       ENGINE_PATH_ZIP)
                print("Restored saved engine from bucket to '{0}'".format(
                    ENGINE_PATH_ZIP))
                self.get_nlu_engine()
        else:
            loaded_engine = SnipsNLUEngine.from_path(ENGINE_PATH_NEW)
            self.nlu_engine = loaded_engine
            print("Success! Engine was fitted...")
        return self.nlu_engine

    def _load_training_data(self):
        self.training_data = self.context.get_trainings_data()
        if self.training_data == "":
            print("There are no training data!")
        else:
            print("Training data were loaded successfully")

    def _train_nlu(self):
        self.nlu_engine.fit(self.training_data)
        print("Engine was trained successfully")

    def _persist_nlu(self):
        result = False
        # first save engine attempt
        if not (ENGINE_PATH_NEW.exists()):
            self.nlu_engine.persist(ENGINE_PATH_NEW)
            result = self._persist_to_bucket(ENGINE_PATH_NEW, ENGINE_PATH_ZIP,
                                             NEW_ENGINE_NAME_ZIP)
        else:
            #Remove&override old backup
            if ENGINE_PATH_OLD.exists():
                shutil.rmtree(ENGINE_PATH_OLD)
                self.cos_context.remove_file(OLD_ENGINE_NAME_ZIP)
                print("Overrided old engine backup...")
            #save(rename) new engine as old local and in persist
            os.rename(ENGINE_PATH_NEW, ENGINE_PATH_OLD)
            self.cos_context.rename_file(NEW_ENGINE_NAME_ZIP,
                                         OLD_ENGINE_NAME_ZIP)
            #create new new engine
            self.nlu_engine.persist(ENGINE_PATH_NEW)
            result = self._persist_to_bucket(ENGINE_PATH_NEW, ENGINE_PATH_ZIP,
                                             NEW_ENGINE_NAME_ZIP)

        if result:
            print("Engine was saved successfully")
        return result

    #Persist engine as zip to bucket to decrease up/download time (5-6 MB vs 1.5 MB compressed)
    def _compress_engine(self, source, destination):
        base = os.path.basename(destination)
        name = base.split('.')[0]
        format = base.split('.')[1]
        archive_from = os.path.dirname(source)
        archive_to = os.path.basename(source.strip(os.sep))
        print(source, destination, archive_from, archive_to)
        shutil.make_archive(name, format, archive_from, archive_to)
        shutil.move('%s.%s' % (name, format), destination)
        print("Engine was zipped...")

    def _decompress_engine(self, source, destination):
        zip_ref = zipfile.ZipFile(source, 'r')
        zip_ref.extractall(destination)
        print("Engine was unzipped..")

    #Engine folder -> zip -> save to ibm bucket
    def _persist_to_bucket(self, source, destination, file_name):
        #Python3 -> python2 compatibility, libpath Path to string
        source = str(source)
        destination = str(destination)
        file_name = str(file_name)
        self._compress_engine(source, destination + "/" + file_name)
        result = self.cos_context.upload_file(destination + "/" + file_name,
                                              file_name)
        return result

    # Download zipped engine -> save -> unzip it
    def _load_from_bucket(self, destination_zip, file_name, to_unzip_path):
        #Python3 -> python2 compatibility, libpath Path to string
        destination_zip = str(destination_zip)
        file_name = str(file_name)
        to_unzip_path = str(to_unzip_path)
        result = self.cos_context.download_file(destination_zip, file_name)
        if result:
            self._decompress_engine(destination_zip + "/" + file_name,
                                    to_unzip_path)
        return result

    def _check_trainer_dir(self, path):
        exist = os.path.isdir(path)
        if not exist:
            os.makedirs(path)
            exist = True
            print("Path '{0}' was created!".format(path))
        return exist
Exemple #19
0
import io
import json

from snips_nlu import SnipsNLUEngine
from snips_nlu.default_configs import CONFIG_EN  #For German please use ..._DE

engine = SnipsNLUEngine(config=CONFIG_EN)

with io.open(
        "dataset.json"
) as f:  #dataset.json needs to be changed to the JSON you generated via YAML
    dataset = json.load(f)

engine.fit(dataset)
engine.persist("path/to/directory")
Exemple #20
0
class SnipsInterpreter(Interpreter):
    """Wraps the snips-nlu stuff to provide valuable informations to an agent.
    """
    def __init__(self,
                 lang: str,
                 cache_directory: str = None,
                 trainings_store: TrainingsStore = None) -> None:
        """Instantiates a new Snips interpreter.

        Args:
          lang (str): Language used for this interpreter (ie. en, fr, ...)
          cache_directory (str): Path where training and trained files are placed
          trainings_store (TrainingsStore): Optional trainings store used when fitting the engine

        """
        super(SnipsInterpreter, self).__init__('snips', lang, cache_directory,
                                               trainings_store)

        self._engine = None
        self._slot_mappings = {}
        self._entities = {}

    def _configure(self) -> None:
        self._slot_mappings = self._engine.dataset_metadata.get(
            'slot_name_mappings', {})
        self._entities = self._engine.dataset_metadata.get(ENTITIES, {})

        self.intents = list(self._slot_mappings.keys())

    def load_from_cache(self) -> None:
        self._logger.info('Loading engine from "%s"', self.cache_directory)
        self._engine = SnipsNLUEngine.from_path(self.cache_directory)
        self._configure()

    def _check_and_install_resources_package(self) -> None:
        resource_pkg_name = f'snips_nlu_{self.lang}'

        if not importlib.util.find_spec(resource_pkg_name):
            self._logger.info(
                'Could not import resource package "%s", attempting installation',
                resource_pkg_name)

            try:
                subprocess.run(
                    [sys.executable, '-m', 'snips_nlu', 'download', self.lang],
                    check=True,
                    stdout=subprocess.PIPE)
                self._logger.info('Successfuly downloaded "%s"!',
                                  resource_pkg_name)
                # Reload resources (used by snips to determine if resources are installed)
                importlib.reload(pkg_resources)
            except:  # pragma: no cover pylint: disable=W0702
                self._logger.warning(
                    'Looks like it fails, you may have to do it manually with: '\
                    '"python -m snips_nlu download %s"',
                    self.lang)

        return resource_pkg_name

    def fit(self, data: dict) -> None:
        super().fit(data)

        data_lang = data.get('language')

        if data_lang != self.lang:  # pragma: no cover
            self._logger.warning(
                'Training language "%s" and interpreter language "%s" do not match,'\
                'things could go badly',
                data_lang, self.lang)

        self._logger.info('Fitting using "snips v%s"', __version__)

        checksum = compute_checksum(data)
        cached_checksum = None

        # Try to load the used checksum
        if self.cache_directory:
            cached_checksum_path = os.path.join(self.cache_directory,
                                                'trained.checksum')
            cached_checksum = read_file(cached_checksum_path,
                                        ignore_errors=True)

        if not cached_checksum:
            self._logger.debug('Checksum file not found')

        if checksum == cached_checksum:
            self.load_from_cache()
        else:
            config = None

            try:
                self._logger.info(
                    'Importing default configuration for language "%s"',
                    self.lang)
                config = getattr(snips_confs, 'CONFIG_%s' % self.lang.upper())
            except AttributeError:
                self._logger.warning(
                    'Could not import default configuration, it will use the generic one instead'
                )

            resource_pkg_name = self._check_and_install_resources_package()

            self._engine = SnipsNLUEngine(
                config, resources=load_resources(resource_pkg_name))
            self._engine.fit(data)

            if self.cache_directory:  # pragma: no cover
                self._logger.info('Persisting trained engine to "%s"',
                                  self.cache_directory)

                # Make sure the cache directory has been cleaned out
                rmtree(self.cache_directory, ignore_errors=True)

                self._engine.persist(self.cache_directory)

                with open(cached_checksum_path, mode='w') as file:
                    file.write(checksum)

            self._configure()

    @property
    def is_ready(self) -> bool:
        """Returns true if the interpreter is ready.

        Returns:
          bool: Ready or not

        """
        return self._engine and self._engine.fitted

    def parse(self, msg: str, scopes: List[str] = None) -> List[Intent]:
        if not self.is_ready:
            return []

        # TODO manage multiple intents in the same sentence

        parsed = self._engine.parse(msg, intents=scopes)

        if not parsed[RES_INTENT][RES_INTENT_NAME]:
            return []

        slots = {}

        for slot in parsed[RES_SLOTS]:
            name = slot[RES_SLOT_NAME]
            parsed_slot = slot[RES_VALUE]
            value = SlotValue(get_entity_value(parsed_slot), **slot)

            if name in slots:
                slots[name].append(value)
            else:
                slots[name] = [value]

        return [
            Intent(parsed[RES_INTENT][RES_INTENT_NAME], **slots),
        ]

    def parse_slot(self, intent: str, slot: str, msg: str) -> List[SlotValue]:
        if not self.is_ready:
            return []

        # Here I still use my own method to parse slots because it gives better
        # results in my benchmarks.
        #
        # However, we should keep an eye on https://github.com/snipsco/snips-nlu/pull/724
        # for when it becomes relevant. For now get_slots returns less results than this
        # homemade method below.

        entity_label = self._slot_mappings.get(intent, {}).get(slot)

        # No label, just returns the given value
        if not entity_label:
            return [SlotValue(msg)]

        result = []

        # If it's a builtin entity, try to parse it
        if is_builtin_entity(entity_label):
            parsed = self._engine.builtin_entity_parser.parse(
                msg, [entity_label])

            for slot_data in parsed:
                # Here we move some keys to keep the returned meta consistent with the parse above
                # We are checking if `rawValue` is already present because snips-nlu seems to keep
                # a cache so to avoid mutating the same dict twice, we check again this added key.

                if RES_RAW_VALUE not in slot_data:
                    slot_data[RES_RAW_VALUE] = slot_data[RES_VALUE]
                    slot_data[RES_VALUE] = slot_data[RESOLVED_VALUE]
                    slot_data[ENTITY] = slot_data[ENTITY_KIND]

                result.append(
                    SlotValue(get_entity_value(slot_data[RES_VALUE]),
                              **slot_data))
        else:
            parsed = self._engine.custom_entity_parser.parse(
                msg, [entity_label])

            # The custom parser did not found a match and it's extensible? Just returns the value
            if not parsed and self._entities.get(entity_label,
                                                 {})[AUTOMATICALLY_EXTENSIBLE]:
                return [SlotValue(msg)]

            for slot_data in parsed:
                if RES_RAW_VALUE not in slot_data:
                    slot_data[RES_RAW_VALUE] = slot_data[RES_VALUE]
                    slot_data[RES_VALUE] = {
                        'kind': 'Custom',
                        RES_VALUE: slot_data[RESOLVED_VALUE],
                    }
                    slot_data[ENTITY] = slot_data[ENTITY_KIND]

                result.append(
                    SlotValue(get_entity_value(slot_data[RES_VALUE]),
                              **slot_data))

        return result
Exemple #21
0
import io
import json
from snips_nlu import SnipsNLUEngine
from snips_nlu.default_configs import CONFIG_EN

# snips-nlu generate-dataset en dataset.yaml > dataset.json

engine = SnipsNLUEngine(config=CONFIG_EN)
with io.open('dataset.json') as f:
    dataset = json.load(f)
engine.fit(dataset)
engine.persist("model")
Exemple #22
0
    if (lang == "fr"):
        config = CONFIG_FR

    if (lang == "it"):
        config = CONFIG_IT

    if (lang == "ja"):
        config = CONFIG_JA

    if (lang == "ko"):
        config = CONFIG_KO

    add_entity(lang)
    os.system("python3 -m snips_nlu download " + lang)
    os.system("python3 -m snips_nlu generate-dataset " + lang + " " + lang +
              ".definition.yaml > definition.json")

    try:
        shutil.rmtree(lang)
    except Exception as e:
        print(e)

    DATASET_PATH = Path(__file__).parent / "definition.json"
    with DATASET_PATH.open(encoding="utf8") as f:
        sample_dataset = json.load(f)

    nlu_engine = SnipsNLUEngine(config=config)
    nlu_engine.fit(sample_dataset)
    nlu_engine.persist(lang)
Exemple #23
0
class SnipsInterpreter(Interpreter):
    """Wraps the snips-nlu stuff to provide valuable informations to an agent.
  """
    def __init__(self, lang, cache_directory=None):
        """Instantiates a new Snips interpreter.

    Args:
      lang (str): Language used for this interpreter (ie. en, fr, ...)
      cache_directory (str): Path where training and trained files are placed

    """

        super(SnipsInterpreter, self).__init__('snips', lang, cache_directory)

        self._engine = None
        self._slot_mappings = {}
        self._entities = {}

    def _configure(self):
        self._slot_mappings = self._engine.dataset_metadata.get(
            'slot_name_mappings', {})
        self._entities = self._engine.dataset_metadata.get(ENTITIES, {})

        self.intents = list(self._slot_mappings.keys())

    def load_from_cache(self):
        self._logger.info('Loading engine from "%s"' % self.cache_directory)

        self._engine = SnipsNLUEngine.from_path(self.cache_directory)

        self._configure()

    def fit(self, data):
        data_lang = data.get('language')

        if data_lang != self.lang:
            self._logger.warning(
                'Training language "%s" and interpreter language "%s" do not match, things could go badly'
                % (data_lang, self.lang))

        self._logger.info('Fitting using "snips v%s"' % __version__)

        checksum = compute_checksum(data)
        cached_checksum = None

        # Try to load the used checksum
        if self.cache_directory:
            cached_checksum_path = os.path.join(self.cache_directory,
                                                'trained.checksum')
            cached_checksum = read_file(cached_checksum_path,
                                        ignore_errors=True)

        if not cached_checksum:
            self._logger.debug('Checksum file not found')

        if checksum == cached_checksum:
            self.load_from_cache()
        else:
            config = None

            try:
                self._logger.info(
                    'Importing default configuration for language "%s"' %
                    self.lang)
                config = getattr(snips_confs, 'CONFIG_%s' % self.lang.upper())
            except AttributeError:
                self._logger.warning(
                    'Could not import default configuration, it will use the generic one instead'
                )

            self._engine = SnipsNLUEngine(config,
                                          resources=load_resources(
                                              'snips_nlu_%s' % self.lang))
            self._engine.fit(data)

            if self.cache_directory:
                self._logger.info('Persisting trained engine to "%s"' %
                                  self.cache_directory)

                rmtree(self.cache_directory, ignore_errors=True)

                self._engine.persist(self.cache_directory)

                with open(cached_checksum_path, mode='w') as f:
                    f.write(checksum)

            self._configure()

    @property
    def is_ready(self):
        """Returns true if the interpreter is ready.

    Returns:
      bool: Ready or not

    """

        return self._engine and self._engine.fitted

    def parse(self, msg, scopes=None):
        if not self.is_ready:
            return []

        # TODO manage multiple intents in the same sentence

        parsed = self._engine.parse(msg, intents=scopes)

        if parsed[RES_INTENT][RES_INTENT_NAME] == None:
            return []

        slots = {}

        for slot in parsed[RES_SLOTS]:
            name = slot[RES_SLOT_NAME]
            parsed_slot = slot[RES_VALUE]
            value = SlotValue(get_entity_value(parsed_slot), **slot)

            if name in slots:
                slots[name].append(value)
            else:
                slots[name] = [value]

        return [
            Intent(parsed[RES_INTENT][RES_INTENT_NAME], **slots),
        ]

    def parse_slot(self, intent, slot, msg):
        if not self.is_ready:
            return []

        # Here I still use my own method to parse slots because it gives better
        # results in my benchmarks.
        #
        # However, we should keep an eye on https://github.com/snipsco/snips-nlu/pull/724
        # for when it becomes relevant. For now get_slots returns less results than this
        # homemade method below.

        entity_label = self._slot_mappings.get(intent, {}).get(slot)

        # No label, just returns the given value
        if not entity_label:
            return [SlotValue(msg)]

        result = []

        # If it's a builtin entity, try to parse it
        if is_builtin_entity(entity_label):
            parsed = self._engine.builtin_entity_parser.parse(
                msg, [entity_label])

            for slot_data in parsed:
                # Here we move some keys to keep the returned meta consistent with the parse above
                # We are checking if `rawValue` is already present because snips-nlu seems to keep
                # a cache so to avoid mutating the same dict twice, we check again this added key.

                if RES_RAW_VALUE not in slot_data:
                    slot_data[RES_RAW_VALUE] = slot_data[RES_VALUE]
                    slot_data[RES_VALUE] = slot_data[RESOLVED_VALUE]
                    slot_data[ENTITY] = slot_data[ENTITY_KIND]

                result.append(
                    SlotValue(get_entity_value(slot_data[RES_VALUE]),
                              **slot_data))
        else:
            parsed = self._engine.custom_entity_parser.parse(
                msg, [entity_label])

            # The custom parser did not found a match and it's extensible? Just returns the value
            if not parsed and self._entities.get(
                    entity_label, {})[AUTOMATICALLY_EXTENSIBLE] == True:
                return [SlotValue(msg)]

            for slot_data in parsed:
                if RES_RAW_VALUE not in slot_data:
                    slot_data[RES_RAW_VALUE] = slot_data[RES_VALUE]
                    slot_data[RES_VALUE] = {
                        'kind': 'Custom',
                        RES_VALUE: slot_data[RESOLVED_VALUE],
                    }
                    slot_data[ENTITY] = slot_data[ENTITY_KIND]

                result.append(
                    SlotValue(get_entity_value(slot_data[RES_VALUE]),
                              **slot_data))

        return result
Exemple #24
0
import io
import json
from snips_nlu import SnipsNLUEngine

with io.open("dataset.json") as f:
    dataset = json.load(f)

engine = SnipsNLUEngine()

engine.fit(dataset)

engine.persist("../model")
Exemple #25
0
@author: yanni
"""

import io
import json

from snips_nlu import SnipsNLUEngine

path = '/Users/yanni/PycharmProjects/chatbot/src/'
### train Slots Detection Model

#!snips-nlu generate-dataset en {path}/Movie_intent.yaml {path}/Movie_entity.yaml  > {path}/Movie_dataset.json
with io.open(path + 'Movie_dataset.json') as f:
    sample_dataset = json.load(f)

nlu_engine = SnipsNLUEngine()

nlu_engine.fit(sample_dataset)

nlu_engine.persist(path + 'Movie_Slots_Detection')

#!snips-nlu generate-dataset en {path}/Aspect_intent.yaml {path}/Aspect_entity.yaml  > {path}/Aspect_dataset.json
with io.open(path + 'Aspect_dataset.json') as f:
    aspect_dataset = json.load(f)

nlu_engine = SnipsNLUEngine()

nlu_engine.fit(aspect_dataset)

nlu_engine.persist(path + 'Aspect_Slots_Detection')
Exemple #26
0
"""
This file is responsible for training and saving the SnipsNLU Engine
"""

import json

from snips_nlu import SnipsNLUEngine
from snips_nlu.default_configs import CONFIG_EN

engine = SnipsNLUEngine(config=CONFIG_EN)

with open("dataset.json") as f:
    dataset = json.load(f)

engine.fit(dataset)
engine.persist("persisted_engine")
Exemple #27
0
from __future__ import unicode_literals, print_function

import io
import json

from snips_nlu import SnipsNLUEngine, load_resources
from snips_nlu.default_configs import CONFIG_EN, CONFIG_DE

with io.open("lights_dataset_de.json") as f:
    sample_dataset = json.load(f)

# load_resources("en")
# nlu_engine = SnipsNLUEngine(config=CONFIG_EN)
load_resources("de")
nlu_engine = SnipsNLUEngine(config=CONFIG_DE)
nlu_engine.fit(sample_dataset)
nlu_engine.persist("models/current")

Exemple #28
0
import io
import json

from snips_nlu import SnipsNLUEngine
from snips_nlu.default_configs import CONFIG_EN

engine = SnipsNLUEngine(config=CONFIG_EN)

with io.open("chess_speech.json") as f:
    dataset = json.load(f)

engine.fit(dataset)
engine.persist("./chess_engine")