def read_from_json(self, js: Dict[Text, Any], **_: Any) -> "TrainingData": """Loads training data stored in the rasa NLU data format.""" import rasa.shared.nlu.training_data.schemas.data_schema as schema import rasa.shared.utils.validation as validation_utils validation_utils.validate_training_data(js, schema.rasa_nlu_data_schema()) data = js["rasa_nlu_data"] common_examples = data.get("common_examples", []) entity_synonyms = data.get("entity_synonyms", []) regex_features = data.get("regex_features", []) lookup_tables = data.get("lookup_tables", []) entity_synonyms = transform_entity_synonyms(entity_synonyms) training_examples = [] for ex in common_examples: # taking care of custom entries msg = Message.build( text=ex.pop(TEXT, ""), intent=ex.pop(INTENT, None), entities=ex.pop(ENTITIES, None), **ex, ) training_examples.append(msg) return TrainingData(training_examples, entity_synonyms, regex_features, lookup_tables)
def _get_validated_dict(json_str: Text) -> Dict[Text, Text]: """Converts the provided json_str to a valid dict containing the entity attributes. Users can specify entity roles, synonyms, groups for an entity in a dict, e.g. [LA]{"entity": "city", "role": "to", "value": "Los Angeles"} Args: json_str: the entity dict as string without "{}" Raises: ValidationError if validation of entity dict fails. JSONDecodeError if provided entity dict is not valid json. Returns: a proper python dict """ import json import rasa.shared.utils.validation as validation_utils import rasa.shared.nlu.training_data.schemas.data_schema as schema # add {} as they are not part of the regex try: data = json.loads(f"{{{json_str}}}") except JSONDecodeError as e: rasa.shared.utils.io.raise_warning( f"Incorrect training data format ('{{{json_str}}}'), make sure your " f"data is valid. For more information about the format visit " f"{LEGACY_DOCS_BASE_URL}/nlu/training-data-format/.") raise e validation_utils.validate_training_data(data, schema.entity_dict_schema()) return data
def test_url_data_format(): data = """ { "rasa_nlu_data": { "entity_synonyms": [ { "value": "nyc", "synonyms": ["New York City", "nyc", "the big apple"] } ], "common_examples" : [ { "text": "show me flights to New York City", "intent": "unk", "entities": [ { "entity": "destination", "start": 19, "end": 32, "value": "NYC" } ] } ] } }""" fname = io_utils.create_temporary_file( data.encode(rasa.shared.utils.io.DEFAULT_ENCODING), suffix="_tmp_training_data.json", mode="w+b", ) data = rasa.shared.utils.io.read_json_file(fname) assert data is not None validation_utils.validate_training_data(data, schema.rasa_nlu_data_schema())
def get_validated_dict(json_str: Text) -> Dict[Text, Text]: """Converts the provided `json_str` to a valid dict containing the entity attributes. Users can specify entity roles, synonyms, groups for an entity in a dict, e.g. [LA]{"entity": "city", "role": "to", "value": "Los Angeles"}. Args: json_str: The entity dict as string without "{}". Raises: ValidationError if validation of entity dict fails. JSONDecodeError if provided entity dict is not valid json. Returns: Deserialized and validated `json_str`. """ import json import rasa.shared.utils.validation as validation_utils import rasa.shared.nlu.training_data.schemas.data_schema as schema # add {} as they are not part of the regex try: data = json.loads(f"{{{json_str}}}") except JSONDecodeError as e: rasa.shared.utils.io.raise_warning( f"Incorrect training data format ('{{{json_str}}}'). Make sure your " f"data is valid.", docs=DOCS_URL_TRAINING_DATA_NLU, ) raise e validation_utils.validate_training_data(data, schema.entity_dict_schema()) return data
def test_validate_training_data_is_throwing_exceptions(invalid_data): with pytest.raises(SchemaValidationError): validation_utils.validate_training_data(invalid_data, schema.rasa_nlu_data_schema())
def test_example_training_data_is_valid(): demo_json = "data/examples/rasa/demo-rasa.json" data = rasa.shared.utils.io.read_json_file(demo_json) validation_utils.validate_training_data(data, schema.rasa_nlu_data_schema())
def test_entity_dict_is_valid(data): validation_utils.validate_training_data(data, schema.entity_dict_schema())
def test_validate_entity_dict_is_throwing_exceptions(invalid_data): with pytest.raises(SchemaValidationError): validation_utils.validate_training_data(invalid_data, schema.entity_dict_schema())