def validate_and_format_dataset(dataset): """Checks that the dataset is valid and format it""" # Make this function idempotent if dataset.get(VALIDATED, False): return dataset dataset = deepcopy(dataset) dataset = json.loads(json.dumps(dataset)) validate_type(dataset, dict) mandatory_keys = [INTENTS, ENTITIES, LANGUAGE] for key in mandatory_keys: validate_key(dataset, key, object_label="dataset") validate_type(dataset[ENTITIES], dict) validate_type(dataset[INTENTS], dict) language = dataset[LANGUAGE] validate_type(language, str) if language not in get_all_languages(): raise ValueError("Unknown language: '%s'" % language) for intent in itervalues(dataset[INTENTS]): validate_and_format_intent(intent, dataset[ENTITIES]) queries_entities_values = extract_queries_entities(dataset) for entity_name, entity in iteritems(dataset[ENTITIES]): queries_entities = queries_entities_values[entity_name] if is_builtin_entity(entity_name): dataset[ENTITIES][entity_name] = \ validate_and_format_builtin_entity(entity, queries_entities) else: dataset[ENTITIES][entity_name] = validate_and_format_custom_entity( entity, queries_entities, language) dataset[VALIDATED] = True return dataset
def validate_and_format_dataset(dataset): """Checks that the dataset is valid and format it""" # Make this function idempotent if dataset.get(VALIDATED, False): return dataset dataset = deepcopy(dataset) dataset = json.loads(json.dumps(dataset)) validate_type(dataset, dict) mandatory_keys = [INTENTS, ENTITIES, LANGUAGE] for key in mandatory_keys: validate_key(dataset, key, object_label="dataset") validate_type(dataset[ENTITIES], dict) validate_type(dataset[INTENTS], dict) language = dataset[LANGUAGE] validate_type(language, str) if language not in get_all_languages(): raise ValueError("Unknown language: '%s'" % language) for intent in itervalues(dataset[INTENTS]): validate_and_format_intent(intent, dataset[ENTITIES]) queries_entities_values = extract_queries_entities(dataset) for entity_name, entity in iteritems(dataset[ENTITIES]): queries_entities = queries_entities_values[entity_name] if is_builtin_entity(entity_name): dataset[ENTITIES][entity_name] = \ validate_and_format_builtin_entity(entity, queries_entities) else: dataset[ENTITIES][entity_name] = validate_and_format_custom_entity( entity, queries_entities, language) dataset[VALIDATED] = True return dataset
def test_space_should_by_ignored(self): # Given text = " " for l in get_all_languages(): # When tokens = tokenize(text, l) # Then self.assertEqual(len(tokens), 0)
def test_space_should_by_ignored(self): # Given text = " " for l in get_all_languages(): # When tokens = tokenize(text, l) # Then self.assertEqual(len(tokens), 0)
def test_resources_index_should_have_all_languages(self): # Given index = RESOURCE_INDEX # When languages = set(index) # Then self.assertSetEqual(languages, get_all_languages())
def test_should_parse_in_all_languages(self): # Given all_languages = get_all_languages() text = "1234" # When / Then for language in all_languages: parser = BuiltinEntityParser(language) parser.parse(text)
def test_get_builtin_entities_should_support_all_languages(self): # Given text = "" for language in get_all_languages(): msg = "get_builtin_entities does not support %s." % language with self.fail_if_exception(msg): # When / Then get_builtin_entities(text, language)
def test_get_builtin_entities_should_support_all_languages(self): # Given text = "" for language in get_all_languages(): msg = "get_builtin_entities does not support %s." % language with self.fail_if_exception(msg): # When / Then get_builtin_entities(text, language)
def test_all_languages_should_have_stop_words(self): # The capitalization for the CRF assumes all languages have stop_words # Given for language in get_all_languages(): try: # When/Then get_stop_words(language) except: # pylint: disable=W0702 self.fail("%s has not stop words" % language)
def test_all_languages_should_have_stop_words(self): # The capitalization for the CRF assumes all languages have stop_words # Given for language in get_all_languages(): try: # When/Then get_stop_words(language) except: # pylint: disable=W0702 self.fail("%s has not stop words" % language)
def test_resources_index_should_have_all_languages(self): # Given index = RESOURCE_INDEX # When languages = set(index) # Then self.assertSetEqual(languages, get_all_languages())
def test_should_support_all_languages(self): # Given text = "" for language in get_all_languages(): parser = BuiltinEntityParser.build(language=language) msg = "get_builtin_entities does not support %s." % language with self.fail_if_exception(msg): # When / Then parser.parse(text)
def test_default_configs_should_work(self): # Given dataset = deepcopy(WEATHER_DATASET) for language in get_all_languages(): # When config = DEFAULT_CONFIGS.get(language) self.assertIsNotNone(config, "Missing default config for '%s'" % language) dataset[LANGUAGE] = language engine = SnipsNLUEngine(config).fit(dataset) result = engine.parse("Please give me the weather in Paris") # Then intent_name = result[RES_INTENT][RES_INTENT_NAME] self.assertEqual("SearchWeatherForecast", intent_name)
def test_nlu_engine_should_train_and_parse_in_all_languages(self): # Given text = "brew me an espresso" for language in get_all_languages(): dataset = deepcopy(BEVERAGE_DATASET) dataset[LANGUAGE] = language engine = SnipsNLUEngine() # When / Then msg = "Could not fit engine in '%s'" % language with self.fail_if_exception(msg): engine = engine.fit(dataset) msg = "Could not parse in '%s'" % language with self.fail_if_exception(msg): engine.parse(text)
def test_nlu_engine_should_train_and_parse_in_all_languages(self): # Given text = "brew me an espresso" for language in get_all_languages(): dataset = deepcopy(BEVERAGE_DATASET) dataset[LANGUAGE] = language engine = SnipsNLUEngine() # When / Then msg = "Could not fit engine in '%s'" % language with self.fail_if_exception(msg): engine = engine.fit(dataset) msg = "Could not parse in '%s'" % language with self.fail_if_exception(msg): engine.parse(text)
def test_sample_configs_should_work(self): # Given dataset = self.sample_dataset for language in get_all_languages(): # When config_file = "config_%s.json" % language config_path = os.path.join(SAMPLES_PATH, "configs", config_file) with io.open(config_path) as f: config = json.load(f) dataset[LANGUAGE] = language engine = SnipsNLUEngine(config).fit(dataset) result = engine.parse("Please give me the weather in Paris") # Then intent_name = result[RES_INTENT][RES_INTENT_NAME] self.assertEqual("sampleGetWeather", intent_name)
def download_all_languages(*pip_args): """Download compatible resources for all supported languages""" for language in get_all_languages(): download(language, False, *pip_args)
def __init__(self, methodName='runTest'): super(SnipsTest, self).__init__(methodName) for l in get_all_languages(): load_resources(l)
def __init__(self, methodName='runTest'): super(SnipsTest, self).__init__(methodName) for l in get_all_languages(): load_resources(l)
def setUp(self): for l in get_all_languages(): load_resources(l)