def task(source, dest, cred_file, percent):

    # load Rasa NLU training data
    r = MarkdownReader()
    with open(source, "r") as fin:
        nlu = fin.read()

    nlu_train = r.reads(nlu)

    translate_client = translate.Client.from_service_account_json(cred_file)

    def trans(text):
        trans_text = translate_client.translate(text,
                                                source_language="en",
                                                target_language="zh-TW")
        logger.info(u'origin: {}, translated: {}'.format(
            example.text, trans_text['translatedText']))
        return trans_text['translatedText']

    nlu_train.training_examples = random_select_samples(
        nlu_train.training_examples, percent)
    for example in nlu_train.training_examples:
        example.text = trans(example.text)
        if example.get("entities"):
            for entity in example.get("entities"):
                entity["value"] = trans(entity['value'])

    # Generate Rasa NLU translated training data
    w = MarkdownWriter()
    w.dump(dest, nlu_train)
Example #2
0
def test_dump_nlu_with_responses():
    md = """## intent:greet
- hey
- howdy
- hey there
- hello
- hi
- good morning
- good evening
- dear sir

## intent:chitchat/ask_name
- What's your name?
- What can I call you?

## intent:chitchat/ask_weather
- How's the weather?
- Is it too hot outside?
"""

    r = MarkdownReader()
    nlu_data = r.reads(md)

    dumped = nlu_data.nlu_as_markdown()
    assert dumped == md
Example #3
0
def _write_nlu_yaml(training_data_path: Path, output_path: Path,
                    source_path: Path) -> None:
    reader = MarkdownReader()
    writer = RasaYAMLWriter()

    training_data = reader.read(training_data_path)
    writer.dump(output_path, training_data)

    print_success(f"Converted NLU file: '{source_path}' >> '{output_path}'.")
Example #4
0
def test_markdown_entity_regex():
    r = MarkdownReader()

    md = """
## intent:restaurant_search
- i'm looking for a place to eat
- i'm looking for a place in the [north](loc-direction) of town
- show me [chines](cuisine:chinese) restaurants
- show me [chines](22_ab-34*3.A:43er*+?df) restaurants
    """

    result = r.reads(md)

    assert len(result.training_examples) == 4
    first = result.training_examples[0]
    assert first.data == {"intent": "restaurant_search"}
    assert first.text == "i'm looking for a place to eat"

    second = result.training_examples[1]
    assert second.data == {
        "intent":
        "restaurant_search",
        "entities": [{
            "start": 31,
            "end": 36,
            "value": "north",
            "entity": "loc-direction"
        }],
    }
    assert second.text == "i'm looking for a place in the north of town"

    third = result.training_examples[2]
    assert third.data == {
        "intent":
        "restaurant_search",
        "entities": [{
            "start": 8,
            "end": 14,
            "value": "chinese",
            "entity": "cuisine"
        }],
    }
    assert third.text == "show me chines restaurants"

    fourth = result.training_examples[3]
    assert fourth.data == {
        "intent":
        "restaurant_search",
        "entities": [{
            "start": 8,
            "end": 14,
            "value": "43er*+?df",
            "entity": "22_ab-34*3.A"
        }],
    }
    assert fourth.text == "show me chines restaurants"
Example #5
0
def read_inputs_md(input_path):
    reader = MarkdownReader()
    reader.read(input_path, language='de', fformat='MARKDOWN')
    texts = []
    cats = []
    for message in reader.training_examples:
        texts.append(message.text)
        cats.append(message.get('intent'))

    return texts, cats
Example #6
0
def test_check_check_correct_entity_annotations(text: Text, warnings: int):
    reader = MarkdownReader()
    tokenizer = WhitespaceTokenizer()

    training_data = reader.reads(text)
    tokenizer.train(training_data)

    with pytest.warns(UserWarning) as record:
        EntityExtractor.check_correct_entity_annotations(training_data)

    assert len(record) == warnings
    assert all([excerpt in record[0].message.args[0]]
               for excerpt in ["Misaligned entity annotation in sentence"])
Example #7
0
def test_markdown_entity_regex(example: Text, expected_num_entities: int):
    r = MarkdownReader()

    md = f"""
## intent:test-intent
- {example}
    """

    result = r.reads(md)

    assert len(result.training_examples) == 1
    actual_example = result.training_examples[0]
    assert actual_example.data["intent"] == "test-intent"
    assert len(actual_example.data.get("entities", [])) == expected_num_entities
Example #8
0
def test_markdown_order():
    r = MarkdownReader()

    md = """## intent:z
- i'm looking for a place to eat
- i'm looking for a place in the [north](loc-direction) of town

## intent:a
- intent a
- also very important
"""

    training_data = r.reads(md)
    assert training_data.nlu_as_markdown() == md
Example #9
0
def test_deprecation_warning_logged():
    r = MarkdownReader()

    md = """
## intent:test-intent
- I want to go to [LA](city:Los Angeles)
    """

    with pytest.warns(
            FutureWarning,
            match=
            r"You are using the deprecated training data format to declare "
            r"synonyms.*",
    ):
        r.reads(md)
Example #10
0
def test_markdown_unespace_tokens():
    r = MarkdownReader()

    md = """## intent:test-intent
- Hi \\t Can you help me?\\n I want to go to [Alexandria]{"entity": "city"}
"""
    expected_num_entities = 1

    training_data = r.reads(md)
    assert len(training_data.training_examples) == 1

    actual_example = training_data.training_examples[0]
    assert actual_example.data["intent"] == "test-intent"
    assert len(actual_example.data.get("entities",
                                       [])) == expected_num_entities
Example #11
0
File: data.py Project: sysang/rasa
def _convert_to_yaml(args: argparse.Namespace, is_nlu: bool) -> None:

    output = Path(args.out)
    if not os.path.exists(output):
        print_error_and_exit(
            f"The output path '{output}' doesn't exist. Please make sure to specify "
            f"an existing directory and try again."
        )

    training_data = Path(args.data)
    if not os.path.exists(training_data):
        print_error_and_exit(
            f"The training data path {training_data} doesn't exist "
            f"and will be skipped."
        )

    num_of_files_converted = 0
    for file in os.listdir(training_data):
        source_path = training_data / file
        output_path = output / f"{source_path.stem}{CONVERTED_FILE_SUFFIX}"

        if MarkdownReader.is_markdown_nlu_file(source_path):
            if not is_nlu:
                continue
            _write_nlu_yaml(source_path, output_path, source_path)
            num_of_files_converted += 1
        elif not is_nlu and MarkdownStoryReader.is_markdown_story_file(source_path):
            _write_core_yaml(source_path, output_path, source_path)
            num_of_files_converted += 1
        else:
            print_warning(f"Skipped file: '{source_path}'.")

    print_info(f"Converted {num_of_files_converted} file(s), saved in '{output}'.")
Example #12
0
def _reader_factory(fformat: Text) -> Optional["TrainingDataReader"]:
    """Generates the appropriate reader class based on the file format."""
    from rasa.nlu.training_data.formats import (
        MarkdownReader,
        WitReader,
        LuisReader,
        RasaReader,
        DialogflowReader,
        NLGMarkdownReader,
    )

    reader = None
    if fformat == LUIS:
        reader = LuisReader()
    elif fformat == WIT:
        reader = WitReader()
    elif fformat in DIALOGFLOW_RELEVANT:
        reader = DialogflowReader()
    elif fformat == RASA:
        reader = RasaReader()
    elif fformat == MARKDOWN:
        reader = MarkdownReader()
    elif fformat == MARKDOWN_NLG:
        reader = NLGMarkdownReader()
    return reader
Example #13
0
def test_markdown_entity_regex():
    r = MarkdownReader()

    md = """
## intent:restaurant_search
- i'm looking for a place to eat
- i'm looking for a place in the [north](loc-direction) of town
- show me [chines](cuisine:chinese) restaurants
- show me [chines](22_ab-34*3.A:43er*+?df) restaurants
    """

    result = r.reads(md)

    assert len(result.training_examples) == 4
    first = result.training_examples[0]
    assert first.data == {"intent": "restaurant_search"}
    assert first.text == "i'm looking for a place to eat"

    second = result.training_examples[1]
    assert second.data == {'intent': 'restaurant_search',
                           'entities': [
                               {'start': 31,
                                'end': 36,
                                'value': 'north',
                                'entity': 'loc-direction'}
                           ]}
    assert second.text == "i'm looking for a place in the north of town"

    third = result.training_examples[2]
    assert third.data == {'intent': 'restaurant_search',
                          'entities': [
                              {'start': 8,
                               'end': 14,
                               'value': 'chinese',
                               'entity': 'cuisine'}]}
    assert third.text == "show me chines restaurants"

    fourth = result.training_examples[3]
    assert fourth.data == {'intent': 'restaurant_search',
                           'entities': [
                               {'start': 8,
                                'end': 14,
                                'value': '43er*+?df',
                                'entity': '22_ab-34*3.A'}]}
    assert fourth.text == "show me chines restaurants"
Example #14
0
def test_markdown_entity_regex(
    example: Text,
    expected_entities: Optional[List[Dict[Text, Any]]],
    expected_text: Text,
):
    r = MarkdownReader()

    md = f"""
## intent:test-intent
- {example}
    """

    result = r.reads(md)

    assert len(result.training_examples) == 1
    actual_example = result.training_examples[0]
    assert actual_example.data["intent"] == "test-intent"
    assert actual_example.data.get("entities") == expected_entities
    assert actual_example.text == expected_text
Example #15
0
def md_format_message(text, intent, entities):
    from rasa.nlu.training_data.formats import MarkdownWriter, MarkdownReader

    message_from_md = MarkdownReader()._parse_training_example(text)
    deserialised_entities = deserialise_entities(entities)
    return MarkdownWriter()._generate_message_md(
        {
            "text": message_from_md.text,
            "intent": intent,
            "entities": deserialised_entities,
        }
    )
Example #16
0
def md_format_message(text, intent, entities) -> Text:
    from rasa.nlu.training_data.formats import MarkdownReader
    from rasa.nlu.training_data.formats.readerwriter import TrainingDataWriter

    message_from_md = MarkdownReader().parse_training_example(text)
    deserialised_entities = deserialise_entities(entities)
    return TrainingDataWriter.generate_message(
        {
            "text": message_from_md.text,
            "intent": intent,
            "entities": deserialised_entities,
        }
    )
Example #17
0
 async def replace_placeholders(self, example: Message, faker_: Faker,
                                matches: List[Tuple[Any, ...]],
                                count: int) -> AsyncIterator[Message]:
     original_text = await self.rebuild_original_text(example)
     for _ in range(count):
         text = await self.replace_placeholders_in_text(
             example.text, faker_, matches)
         original_text = await self.replace_placeholders_in_text(
             original_text, faker_, matches)
         entities = MarkdownReader._find_entities_in_training_example(
             original_text)
         new_message = Message.build(text, example.get("intent"), entities)
         yield new_message
Example #18
0
async def _correct_entities(latest_message: Dict[Text, Any],
                            endpoint: EndpointConfig,
                            sender_id: Text) -> List[Dict[Text, Any]]:
    """Validate the entities of a user message.

    Returns the corrected entities"""
    from rasa.nlu.training_data.formats import MarkdownReader

    parse_original = latest_message.get("parse_data", {})
    entity_str = _as_md_message(parse_original)
    question = questionary.text(
        "Please mark the entities using [value](type) notation",
        default=entity_str)

    annotation = await _ask_questions(question, sender_id, endpoint)
    # noinspection PyProtectedMember
    parse_annotated = MarkdownReader()._parse_training_example(annotation)

    corrected_entities = _merge_annotated_and_original_entities(
        parse_annotated, parse_original)

    return corrected_entities
Example #19
0
from rasa.nlu.training_data.formats import MarkdownReader
import xlsxwriter

workbook = xlsxwriter.Workbook('filename.xlsx')
worksheet = workbook.add_worksheet()
worksheet.write('A1', 'question')
worksheet.write('B1', 'label')
worksheet.write('C1', 'answer')

training_data = ()
row = 1
col = 0

doc = "PATH\\TO\\nlu.md"

reader = MarkdownReader()
reader.read(doc, language='de', fformat='MARKDOWN')
for message in reader.training_examples:
    training_data = training_data + ([message.text, message.get('intent')], )

for question, label in (training_data):
    worksheet.write_string(row, col, question)
    worksheet.write_string(row, col + 1, label)
    worksheet.write_string(row, col + 2, '')
    row += 1

workbook.close()