def test_deprecation_warning_logged(): r = MarkdownReader() md = """ ## intent:test-intent - I want to go to [LA](city:Los Angeles) """ with pytest.warns( FutureWarning, match= r"You are using the deprecated training data format to declare " r"synonyms.*", ): r.reads(md)
def test_dump_nlu_with_responses(): md = """## intent:greet - hey - howdy - hey there - hello - hi - good morning - good evening - dear sir ## intent:chitchat/ask_name - What's your name? - What can I call you? ## intent:chitchat/ask_weather - How's the weather? - Is it too hot outside? """ r = MarkdownReader() nlu_data = r.reads(md) dumped = nlu_data.nlu_as_markdown() assert dumped == md
def task(source, dest, cred_file, percent): # load Rasa NLU training data r = MarkdownReader() with open(source, "r") as fin: nlu = fin.read() nlu_train = r.reads(nlu) translate_client = translate.Client.from_service_account_json(cred_file) def trans(text): trans_text = translate_client.translate(text, source_language="en", target_language="zh-TW") logger.info(u'origin: {}, translated: {}'.format( example.text, trans_text['translatedText'])) return trans_text['translatedText'] nlu_train.training_examples = random_select_samples( nlu_train.training_examples, percent) for example in nlu_train.training_examples: example.text = trans(example.text) if example.get("entities"): for entity in example.get("entities"): entity["value"] = trans(entity['value']) # Generate Rasa NLU translated training data w = MarkdownWriter() w.dump(dest, nlu_train)
def test_markdown_entity_regex(): r = MarkdownReader() md = """ ## intent:restaurant_search - i'm looking for a place to eat - i'm looking for a place in the [north](loc-direction) of town - show me [chines](cuisine:chinese) restaurants - show me [chines](22_ab-34*3.A:43er*+?df) restaurants """ result = r.reads(md) assert len(result.training_examples) == 4 first = result.training_examples[0] assert first.data == {"intent": "restaurant_search"} assert first.text == "i'm looking for a place to eat" second = result.training_examples[1] assert second.data == { "intent": "restaurant_search", "entities": [{ "start": 31, "end": 36, "value": "north", "entity": "loc-direction" }], } assert second.text == "i'm looking for a place in the north of town" third = result.training_examples[2] assert third.data == { "intent": "restaurant_search", "entities": [{ "start": 8, "end": 14, "value": "chinese", "entity": "cuisine" }], } assert third.text == "show me chines restaurants" fourth = result.training_examples[3] assert fourth.data == { "intent": "restaurant_search", "entities": [{ "start": 8, "end": 14, "value": "43er*+?df", "entity": "22_ab-34*3.A" }], } assert fourth.text == "show me chines restaurants"
def test_check_check_correct_entity_annotations(text: Text, warnings: int): reader = MarkdownReader() tokenizer = WhitespaceTokenizer() training_data = reader.reads(text) tokenizer.train(training_data) with pytest.warns(UserWarning) as record: EntityExtractor.check_correct_entity_annotations(training_data) assert len(record) == warnings assert all([excerpt in record[0].message.args[0]] for excerpt in ["Misaligned entity annotation in sentence"])
def test_markdown_order(): r = MarkdownReader() md = """## intent:z - i'm looking for a place to eat - i'm looking for a place in the [north](loc-direction) of town ## intent:a - intent a - also very important """ training_data = r.reads(md) assert training_data.nlu_as_markdown() == md
def test_markdown_entity_regex(example: Text, expected_num_entities: int): r = MarkdownReader() md = f""" ## intent:test-intent - {example} """ result = r.reads(md) assert len(result.training_examples) == 1 actual_example = result.training_examples[0] assert actual_example.data["intent"] == "test-intent" assert len(actual_example.data.get("entities", [])) == expected_num_entities
def test_markdown_unespace_tokens(): r = MarkdownReader() md = """## intent:test-intent - Hi \\t Can you help me?\\n I want to go to [Alexandria]{"entity": "city"} """ expected_num_entities = 1 training_data = r.reads(md) assert len(training_data.training_examples) == 1 actual_example = training_data.training_examples[0] assert actual_example.data["intent"] == "test-intent" assert len(actual_example.data.get("entities", [])) == expected_num_entities
def test_markdown_entity_regex(): r = MarkdownReader() md = """ ## intent:restaurant_search - i'm looking for a place to eat - i'm looking for a place in the [north](loc-direction) of town - show me [chines](cuisine:chinese) restaurants - show me [chines](22_ab-34*3.A:43er*+?df) restaurants """ result = r.reads(md) assert len(result.training_examples) == 4 first = result.training_examples[0] assert first.data == {"intent": "restaurant_search"} assert first.text == "i'm looking for a place to eat" second = result.training_examples[1] assert second.data == {'intent': 'restaurant_search', 'entities': [ {'start': 31, 'end': 36, 'value': 'north', 'entity': 'loc-direction'} ]} assert second.text == "i'm looking for a place in the north of town" third = result.training_examples[2] assert third.data == {'intent': 'restaurant_search', 'entities': [ {'start': 8, 'end': 14, 'value': 'chinese', 'entity': 'cuisine'}]} assert third.text == "show me chines restaurants" fourth = result.training_examples[3] assert fourth.data == {'intent': 'restaurant_search', 'entities': [ {'start': 8, 'end': 14, 'value': '43er*+?df', 'entity': '22_ab-34*3.A'}]} assert fourth.text == "show me chines restaurants"
def test_markdown_entity_regex( example: Text, expected_entities: Optional[List[Dict[Text, Any]]], expected_text: Text, ): r = MarkdownReader() md = f""" ## intent:test-intent - {example} """ result = r.reads(md) assert len(result.training_examples) == 1 actual_example = result.training_examples[0] assert actual_example.data["intent"] == "test-intent" assert actual_example.data.get("entities") == expected_entities assert actual_example.text == expected_text