def read_from_json(self, js: Dict[Text, Any], **_: Any) -> "TrainingData": """Loads training data stored in the rasa NLU data format.""" import rasa.shared.nlu.training_data.schemas.data_schema as schema import rasa.shared.utils.validation as validation_utils validation_utils.validate_training_data(js, schema.rasa_nlu_data_schema()) data = js["rasa_nlu_data"] common_examples = data.get("common_examples", []) entity_synonyms = data.get("entity_synonyms", []) regex_features = data.get("regex_features", []) lookup_tables = data.get("lookup_tables", []) entity_synonyms = transform_entity_synonyms(entity_synonyms) training_examples = [] for ex in common_examples: # taking care of custom entries msg = Message.build( text=ex.pop(TEXT, ""), intent=ex.pop(INTENT, None), entities=ex.pop(ENTITIES, None), **ex, ) training_examples.append(msg) return TrainingData(training_examples, entity_synonyms, regex_features, lookup_tables)
def test_url_data_format(): data = """ { "rasa_nlu_data": { "entity_synonyms": [ { "value": "nyc", "synonyms": ["New York City", "nyc", "the big apple"] } ], "common_examples" : [ { "text": "show me flights to New York City", "intent": "unk", "entities": [ { "entity": "destination", "start": 19, "end": 32, "value": "NYC" } ] } ] } }""" fname = io_utils.create_temporary_file( data.encode(rasa.shared.utils.io.DEFAULT_ENCODING), suffix="_tmp_training_data.json", mode="w+b", ) data = rasa.shared.utils.io.read_json_file(fname) assert data is not None validation_utils.validate_training_data(data, schema.rasa_nlu_data_schema())
def test_validate_training_data_is_throwing_exceptions(invalid_data): with pytest.raises(SchemaValidationError): validation_utils.validate_training_data(invalid_data, schema.rasa_nlu_data_schema())
def test_example_training_data_is_valid(): demo_json = "data/examples/rasa/demo-rasa.json" data = rasa.shared.utils.io.read_json_file(demo_json) validation_utils.validate_training_data(data, schema.rasa_nlu_data_schema())