def test_multiword_entities(tmp_path): data = """ { "rasa_nlu_data": { "common_examples" : [ { "text": "show me flights to New York City", "intent": "unk", "entities": [ { "entity": "destination", "start": 19, "end": 32, "value": "New York City" } ] } ] } }""" f = tmp_path / "tmp_training_data.json" f.write_text(data, rasa.shared.utils.io.DEFAULT_ENCODING) td = load_data(str(f)) assert len(td.entity_examples) == 1 example = td.entity_examples[0] entities = example.get("entities") assert len(entities) == 1 tokens = WhitespaceTokenizer().tokenize(example, attribute=TEXT) start, end = MitieEntityExtractor.find_entity(entities[0], example.get(TEXT), tokens) assert start == 4 assert end == 7
def test_repeated_entities(tmp_path): data = """ { "rasa_nlu_data": { "common_examples" : [ { "text": "book a table today from 3 to 6 for 3 people", "intent": "unk", "entities": [ { "entity": "description", "start": 35, "end": 36, "value": "3" } ] } ] } }""" f = tmp_path / "tmp_training_data.json" f.write_text(data, rasa.shared.utils.io.DEFAULT_ENCODING) td = load_data(str(f)) assert len(td.entity_examples) == 1 example = td.entity_examples[0] entities = example.get("entities") assert len(entities) == 1 tokens = WhitespaceTokenizer().tokenize(example, attribute=TEXT) start, end = MitieEntityExtractor.find_entity(entities[0], example.get(TEXT), tokens) assert start == 9 assert end == 10
def test_multiword_entities(): data = """ { "rasa_nlu_data": { "common_examples" : [ { "text": "show me flights to New York City", "intent": "unk", "entities": [ { "entity": "destination", "start": 19, "end": 32, "value": "New York City" } ] } ] } }""" with tempfile.NamedTemporaryFile(suffix="_tmp_training_data.json") as f: f.write(data.encode("utf-8")) f.flush() td = training_data.load_data(f.name) assert len(td.entity_examples) == 1 example = td.entity_examples[0] entities = example.get("entities") assert len(entities) == 1 tokens = WhitespaceTokenizer().tokenize(example.text) start, end = MitieEntityExtractor.find_entity(entities[0], example.text, tokens) assert start == 4 assert end == 7
def test_repeated_entities(): data = """ { "rasa_nlu_data": { "common_examples" : [ { "text": "book a table today from 3 to 6 for 3 people", "intent": "unk", "entities": [ { "entity": "description", "start": 35, "end": 36, "value": "3" } ] } ] } }""" with tempfile.NamedTemporaryFile(suffix="_tmp_training_data.json") as f: f.write(data.encode("utf-8")) f.flush() td = training_data.load_data(f.name) assert len(td.entity_examples) == 1 example = td.entity_examples[0] entities = example.get("entities") assert len(entities) == 1 tokens = WhitespaceTokenizer().tokenize(example.text) start, end = MitieEntityExtractor.find_entity(entities[0], example.text, tokens) assert start == 9 assert end == 10
def inner(config: Dict[Text, Any], load: bool = False) -> MitieEntityExtractor: if load: constructor = MitieEntityExtractor.load else: constructor = MitieEntityExtractor.create return constructor( model_storage=default_model_storage, execution_context=default_execution_context, resource=Resource("MitieEntityExtractor"), config={ **MitieEntityExtractor.get_default_config(), **config }, )