def _get_validated_dict(json_str: Text) -> Dict[Text, Text]: """Converts the provided json_str to a valid dict containing the entity attributes. Users can specify entity roles, synonyms, groups for an entity in a dict, e.g. [LA]{"entity": "city", "role": "to", "value": "Los Angeles"} Args: json_str: the entity dict as string without "{}" Raises: ValidationError if validation of entity dict fails. JSONDecodeError if provided entity dict is not valid json. Returns: a proper python dict """ import json import rasa.utils.validation as validation_utils import rasa.nlu.schemas.data_schema as schema # add {} as they are not part of the regex try: data = json.loads(f"{{{json_str}}}") except JSONDecodeError as e: raise_warning( f"Incorrect training data format ('{{{json_str}}}'), make sure your " f"data is valid. For more information about the format visit " f"{DOCS_URL_TRAINING_DATA_NLU}.") raise e validation_utils.validate_training_data(data, schema.entity_dict_schema()) return data
def test_url_data_format(): data = """ { "rasa_nlu_data": { "entity_synonyms": [ { "value": "nyc", "synonyms": ["New York City", "nyc", "the big apple"] } ], "common_examples" : [ { "text": "show me flights to New York City", "intent": "unk", "entities": [ { "entity": "destination", "start": 19, "end": 32, "value": "NYC" } ] } ] } }""" fname = io_utils.create_temporary_file( data.encode(io_utils.DEFAULT_ENCODING), suffix="_tmp_training_data.json", mode="w+b", ) data = io_utils.read_json_file(fname) assert data is not None validation_utils.validate_training_data(data, schema.rasa_nlu_data_schema())
def read_from_json(self, js: Dict[Text, Any], **_) -> "TrainingData": """Loads training data stored in the rasa NLU data format.""" from rasa.nlu.training_data import Message, TrainingData import rasa.nlu.schemas.data_schema as schema import rasa.utils.validation as validation_utils validation_utils.validate_training_data(js, schema.rasa_nlu_data_schema()) data = js["rasa_nlu_data"] common_examples = data.get("common_examples", []) intent_examples = data.get("intent_examples", []) entity_examples = data.get("entity_examples", []) entity_synonyms = data.get("entity_synonyms", []) regex_features = data.get("regex_features", []) lookup_tables = data.get("lookup_tables", []) gazette = data.get("gazette", []) entity_synonyms = transform_entity_synonyms(entity_synonyms) if intent_examples or entity_examples: raise_warning( "Your rasa data " "contains 'intent_examples' " "or 'entity_examples' which will be " "removed in the future. Consider " "putting all your examples " "into the 'common_examples' section.", FutureWarning, docs=DOCS_URL_TRAINING_DATA_NLU, ) all_examples = common_examples + intent_examples + entity_examples training_examples = [] for ex in all_examples: msg = Message.build(ex["text"], ex.get("intent"), ex.get("entities")) training_examples.append(msg) return TrainingData(training_examples, entity_synonyms, regex_features, lookup_tables, gazette)
def read_from_json(self, js: Dict[Text, Any], **_) -> "TrainingData": """Loads training data stored in the rasa NLU data format.""" from rasa.nlu.training_data import Message, TrainingData import rasa.nlu.schemas.data_schema as schema import rasa.utils.validation as validation_utils validation_utils.validate_training_data(js, schema.rasa_nlu_data_schema()) data = js["rasa_nlu_data"] common_examples = data.get("common_examples", []) entity_synonyms = data.get("entity_synonyms", []) regex_features = data.get("regex_features", []) lookup_tables = data.get("lookup_tables", []) entity_synonyms = transform_entity_synonyms(entity_synonyms) training_examples = [] for ex in common_examples: msg = Message.build(**ex) training_examples.append(msg) return TrainingData( training_examples, entity_synonyms, regex_features, lookup_tables )
def test_validate_training_data_is_throwing_exceptions(invalid_data): with pytest.raises(ValidationError): validation_utils.validate_training_data( invalid_data, schema.rasa_nlu_data_schema() )
def test_example_training_data_is_valid(): demo_json = "data/examples/rasa/demo-rasa.json" data = io_utils.read_json_file(demo_json) validation_utils.validate_training_data(data, schema.rasa_nlu_data_schema())
def test_entity_dict_is_valid(data): validation_utils.validate_training_data(data, schema.entity_dict_schema())
def test_validate_entity_dict_is_throwing_exceptions(invalid_data): with pytest.raises(ValidationError): validation_utils.validate_training_data( invalid_data, schema.entity_dict_schema() )