def test_dump_nlu_with_responses(): md = """## intent:greet - hey - howdy - hey there - hello - hi - good morning - good evening - dear sir ## intent:chitchat/ask_name - What's your name? - What can I call you? ## intent:chitchat/ask_weather - How's the weather? - Is it too hot outside? """ r = MarkdownReader() nlu_data = r.reads(md) dumped = nlu_data.nlu_as_markdown() assert dumped == md
def test_check_check_correct_entity_annotations(text: Text, warnings: int): reader = MarkdownReader() tokenizer = WhitespaceTokenizer() training_data = reader.reads(text) tokenizer.train(training_data) with pytest.warns(UserWarning) as record: EntityExtractor.check_correct_entity_annotations(training_data) assert len(record) == warnings assert all([excerpt in record[0].message.args[0]] for excerpt in ["Misaligned entity annotation in sentence"])
def test_markdown_order(): r = MarkdownReader() md = """## intent:z - i'm looking for a place to eat - i'm looking for a place in the [north](loc-direction) of town ## intent:a - intent a - also very important """ training_data = r.reads(md) assert training_data.nlu_as_markdown() == md
def test_markdown_entity_regex(example: Text, expected_num_entities: int): r = MarkdownReader() md = f""" ## intent:test-intent - {example} """ result = r.reads(md) assert len(result.training_examples) == 1 actual_example = result.training_examples[0] assert actual_example.data["intent"] == "test-intent" assert len(actual_example.data.get("entities", [])) == expected_num_entities
def test_markdown_unespace_tokens(): r = MarkdownReader() md = """## intent:test-intent - Hi \\t Can you help me?\\n I want to go to [Alexandria]{"entity": "city"} """ expected_num_entities = 1 training_data = r.reads(md) assert len(training_data.training_examples) == 1 actual_example = training_data.training_examples[0] assert actual_example.data["intent"] == "test-intent" assert len(actual_example.data.get("entities", [])) == expected_num_entities
def _reader_factory(fformat: Text) -> Optional["TrainingDataReader"]: """Generates the appropriate reader class based on the file format.""" from rasa.shared.nlu.training_data.formats import ( RasaYAMLReader, MarkdownReader, WitReader, LuisReader, RasaReader, DialogflowReader, NLGMarkdownReader, ) reader = None if fformat == LUIS: reader = LuisReader() elif fformat == WIT: reader = WitReader() elif fformat in DIALOGFLOW_RELEVANT: reader = DialogflowReader() elif fformat == RASA: reader = RasaReader() elif fformat == MARKDOWN: reader = MarkdownReader() elif fformat == MARKDOWN_NLG: reader = NLGMarkdownReader() elif fformat == RASA_YAML: reader = RasaYAMLReader() return reader
def guess_format(filename: Text) -> Text: """Applies heuristics to guess the data format of a file. Args: filename: file whose type should be guessed Returns: Guessed file format. """ from rasa.shared.nlu.training_data.formats import RasaYAMLReader guess = UNK if not os.path.isfile(filename): return guess try: content = rasa.shared.utils.io.read_file(filename) js = json.loads(content) except ValueError: if MarkdownReader.is_markdown_nlu_file(filename): guess = MARKDOWN elif NLGMarkdownReader.is_markdown_nlg_file(filename): guess = MARKDOWN_NLG elif RasaYAMLReader.is_yaml_nlu_file(filename): guess = RASA_YAML else: for file_format, format_heuristic in _json_format_heuristics.items(): if format_heuristic(js, filename): guess = file_format break logger.debug(f"Training data format of '{filename}' is '{guess}'.") return guess
def filter(cls, source_path: Path) -> bool: """Checks if the given training data file contains NLU data in `Markdown` format and can be converted to `YAML`. Args: source_path: Path to the training data file. Returns: `True` if the given file can be converted, `False` otherwise """ return MarkdownReader.is_markdown_nlu_file(source_path)
def convert_and_write(cls, source_path: Path, output_path: Path) -> None: """Converts the given training data file and saves it to the output directory. Args: source_path: Path to the training data file. output_path: Path to the output directory. """ output_nlu_path = cls.generate_path_for_converted_training_data_file( source_path, output_path) yaml_training_data = MarkdownReader().read(source_path) RasaYAMLWriter().dump(output_nlu_path, yaml_training_data) for lookup_table in yaml_training_data.lookup_tables: cls._write_nlu_lookup_table_yaml(lookup_table, output_path) print_success( f"Converted NLU file: '{source_path}' >> '{output_nlu_path}'.")
def test_skip_markdown_reading_deprecation(): with pytest.warns(None) as warnings: MarkdownReader(ignore_deprecation_warning=True) assert not warnings
def test_markdown_reading_deprecation(): with pytest.warns(FutureWarning): MarkdownReader()