def test_nlu_engine_should_train_and_parse_in_all_languages(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: MakeTea
utterances:
- make me a [beverage_temperature:Temperature](hot) cup of tea
- make me [number_of_cups:snips/number](five) tea cups
- i want [number_of_cups] cups of [beverage_temperature](boiling hot) tea pls
- can you prepare [number_of_cups] cup of [beverage_temperature](cold) tea ?

---
type: intent
name: MakeCoffee
utterances:
- make me [number_of_cups:snips/number](one) cup of coffee please
- brew [number_of_cups] cups of coffee
- can you prepare [number_of_cups] cup of coffee""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        text = "please brew me a cup of coffee"
        for language in get_all_languages():
            dataset[LANGUAGE] = language
            engine = SnipsNLUEngine()

            # When / Then
            msg = "Could not fit engine in '%s'" % language
            with self.fail_if_exception(msg):
                engine = engine.fit(dataset)

            msg = "Could not parse in '%s'" % language
            with self.fail_if_exception(msg):
                res = engine.parse(text)
            self.assertEqual("MakeCoffee", res[RES_INTENT][RES_INTENT_NAME])
Exemple #2
0
 def test_space_should_by_ignored(self):
     # Given
     text = " "
     for l in get_all_languages():
         # When
         tokens = tokenize(text, l)
         # Then
         self.assertEqual(len(tokens), 0)
Exemple #3
0
    def test_should_parse_in_all_languages(self):
        # Given
        all_languages = get_all_languages()
        text = "1234"

        # When / Then
        for language in all_languages:
            parser = BuiltinEntityParser.build(language)
            parser.parse(text)
Exemple #4
0
    def test_should_support_all_languages(self):
        # Given
        text = ""

        for language in get_all_languages():
            parser = BuiltinEntityParser.build(language=language)
            msg = "get_builtin_entities does not support %s." % language
            with self.fail_if_exception(msg):
                # When / Then
                parser.parse(text)
def validate_and_format_dataset(dataset):
    """Checks that the dataset is valid and format it

    Raise:
        DatasetFormatError: When the dataset format is wrong
    """
    from snips_nlu_parsers import get_all_languages

    if isinstance(dataset, Dataset):
        dataset = dataset.json

    # Make this function idempotent
    if dataset.get(VALIDATED, False):
        return dataset
    dataset = deepcopy(dataset)
    dataset = json.loads(json.dumps(dataset))
    validate_type(dataset, dict, object_label="dataset")
    mandatory_keys = [INTENTS, ENTITIES, LANGUAGE]
    for key in mandatory_keys:
        validate_key(dataset, key, object_label="dataset")
    validate_type(dataset[ENTITIES], dict, object_label="entities")
    validate_type(dataset[INTENTS], dict, object_label="intents")
    language = dataset[LANGUAGE]
    validate_type(language, str, object_label="language")
    if language not in get_all_languages():
        raise DatasetFormatError("Unknown language: '%s'" % language)

    dataset[INTENTS] = {
        intent_name: intent_data
        for intent_name, intent_data in sorted(iteritems(dataset[INTENTS]))}
    for intent in itervalues(dataset[INTENTS]):
        _validate_and_format_intent(intent, dataset[ENTITIES])

    utterance_entities_values = extract_utterance_entities(dataset)
    builtin_entity_parser = BuiltinEntityParser.build(dataset=dataset)

    dataset[ENTITIES] = {
        intent_name: entity_data
        for intent_name, entity_data in sorted(iteritems(dataset[ENTITIES]))}

    for entity_name, entity in iteritems(dataset[ENTITIES]):
        uterrance_entities = utterance_entities_values[entity_name]
        if is_builtin_entity(entity_name):
            dataset[ENTITIES][entity_name] = \
                _validate_and_format_builtin_entity(entity, uterrance_entities)
        else:
            dataset[ENTITIES][entity_name] = \
                _validate_and_format_custom_entity(
                    entity, uterrance_entities, language,
                    builtin_entity_parser)
    dataset[VALIDATED] = True
    return dataset
Exemple #6
0
    def test_default_configs_should_work(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: TurnLightOn
utterances:
- turn on the lights
- please switch on the light
- switch the light on
- can you turn the light on ?
- I need you to turn on the lights

---
type: intent
name: GetWeather
utterances:
- what is the weather today
- What's the weather in tokyo today?
- Can you tell me the weather please ?
- what is the weather forecast for this weekend""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        for language in get_all_languages():
            # When
            config = DEFAULT_CONFIGS.get(language)
            self.assertIsNotNone(config, "Missing default config for '%s'"
                                 % language)
            dataset[LANGUAGE] = language
            shared = self.get_shared_data(dataset)
            engine = SnipsNLUEngine(config, **shared).fit(dataset)
            result = engine.parse("Please give me the weather in Paris")

            # Then
            self.assertIsNotNone(result[RES_INTENT])
            intent_name = result[RES_INTENT][RES_INTENT_NAME]
            self.assertEqual("GetWeather", intent_name)
Exemple #7
0
def download_all_languages(*pip_args):
    """Download compatible resources for all supported languages"""
    from snips_nlu_parsers import get_all_languages

    for language in get_all_languages():
        download(language, False, *pip_args)
Exemple #8
0
def download_all_languages(*pip_args):
    """Download compatible resources for all supported languages"""
    for language in get_all_languages():
        download(language, False, *pip_args)