Beispiel #1
0
def test_markdown_entity_regex():
    r = MarkdownReader()

    md = """
## intent:restaurant_search
- i'm looking for a place to eat
- i'm looking for a place in the [north](loc-direction) of town
- show me [chines](cuisine:chinese) restaurants
- show me [chines](22_ab-34*3.A:43er*+?df) restaurants
    """

    result = r.reads(md)

    assert len(result.training_examples) == 4
    first = result.training_examples[0]
    assert first.data == {"intent": "restaurant_search"}
    assert first.text == "i'm looking for a place to eat"

    second = result.training_examples[1]
    assert second.data == {
        'intent':
        'restaurant_search',
        'entities': [{
            'start': 31,
            'end': 36,
            'value': 'north',
            'entity': 'loc-direction'
        }]
    }
    assert second.text == "i'm looking for a place in the north of town"

    third = result.training_examples[2]
    assert third.data == {
        'intent':
        'restaurant_search',
        'entities': [{
            'start': 8,
            'end': 14,
            'value': 'chinese',
            'entity': 'cuisine'
        }]
    }
    assert third.text == "show me chines restaurants"

    fourth = result.training_examples[3]
    assert fourth.data == {
        'intent':
        'restaurant_search',
        'entities': [{
            'start': 8,
            'end': 14,
            'value': '43er*+?df',
            'entity': '22_ab-34*3.A'
        }]
    }
    assert fourth.text == "show me chines restaurants"
Beispiel #2
0
def _correct_entities(latest_message: Dict[Text, Any],
                      endpoint: EndpointConfig,
                      sender_id: Text) -> Dict[Text, Any]:
    """Validate the entities of a user message.

    Returns the corrected entities"""

    entity_str = _as_md_message(latest_message.get("parse_data", {}))
    question = questionary.text(
        "Please mark the entities using [value](type) notation",
        default=entity_str)

    annotation = _ask_or_abort(question, sender_id, endpoint)
    # noinspection PyProtectedMember
    parsed = MarkdownReader()._parse_training_example(annotation)
    return parsed.get("entities", [])
Beispiel #3
0
def md_format_message(text, intent, entities):
    message_from_md = MarkdownReader()._parse_training_example(text)
    deserialised_entities = deserialise_entities(entities)
    return MarkdownWriter()._generate_message_md(
            {"text": message_from_md.text,
             "intent": intent,
             "entities": deserialised_entities}
    )
def test_markdown_entity_regex():
    r = MarkdownReader()

    md = """
## intent:restaurant_search
- i'm looking for a place to eat
- i'm looking for a place in the [north](loc-direction) of town
- show me [chines](cuisine:chinese) restaurants
- show me [chines](22_ab-34*3.A:43er*+?df) restaurants
    """

    result = r.reads(md)

    assert len(result.training_examples) == 4
    first = result.training_examples[0]
    assert first.data == {"intent": "restaurant_search"}
    assert first.text == "i'm looking for a place to eat"

    second = result.training_examples[1]
    assert second.data == {'intent': 'restaurant_search',
                           'entities': [
                               {'start': 31,
                                'end': 36,
                                'value': 'north',
                                'entity': 'loc-direction'}
                           ]}
    assert second.text == "i'm looking for a place in the north of town"

    third = result.training_examples[2]
    assert third.data == {'intent': 'restaurant_search',
                          'entities': [
                              {'start': 8,
                               'end': 14,
                               'value': 'chinese',
                               'entity': 'cuisine'}]}
    assert third.text == "show me chines restaurants"

    fourth = result.training_examples[3]
    assert fourth.data == {'intent': 'restaurant_search',
                           'entities': [
                               {'start': 8,
                                'end': 14,
                                'value': '43er*+?df',
                                'entity': '22_ab-34*3.A'}]}
    assert fourth.text == "show me chines restaurants"
Beispiel #5
0
def _correct_entities(latest_message, endpoint, sender_id):
    # type: (Dict[Text, Any], EndpointConfig, Text) -> Dict[Text, Any]
    """Validate the entities of a user message.

    Returns the corrected entities"""

    q = "Please mark the entities using [value](type) notation"
    entity_str = _as_md_message(latest_message.get("parse_data", {}))
    questions = [{
        "type": "input",
        "name": "annotation",
        "default": entity_str,
        "message": q,
    }]
    answers = _ask_questions(questions, sender_id, endpoint)
    # noinspection PyProtectedMember
    parsed = MarkdownReader()._parse_training_example(answers["annotation"])
    return parsed.get("entities", [])
Beispiel #6
0
def _reader_factory(fformat):
    """Generates the appropriate reader class based on the file format."""
    reader = None
    if fformat == LUIS:
        reader = LuisReader()
    elif fformat == WIT:
        reader = WitReader()
    elif fformat in DIALOGFLOW_RELEVANT:
        reader = DialogflowReader()
    elif fformat == RASA:
        reader = RasaReader()
    elif fformat == MARKDOWN:
        reader = MarkdownReader()
    return reader
Beispiel #7
0
def _reader_factory(fformat):
    """Generates the appropriate reader class based on the file format."""
    from rasa_nlu.training_data.formats import (
        MarkdownReader, WitReader, LuisReader,
        RasaReader, DialogflowReader)

    reader = None
    if fformat == LUIS:
        reader = LuisReader()
    elif fformat == WIT:
        reader = WitReader()
    elif fformat in DIALOGFLOW_RELEVANT:
        reader = DialogflowReader()
    elif fformat == RASA:
        reader = RasaReader()
    elif fformat == MARKDOWN:
        reader = MarkdownReader()
    return reader
def _reader_factory(fformat):
    """Generates the appropriate reader class based on the file format."""
    WIT = "wit"
    LUIS = "luis"
    RASA = "rasa_nlu"
    MARKDOWN = "md"
    DIALOGFLOW_RELEVANT = {DIALOGFLOW_ENTITIES, DIALOGFLOW_INTENT}
    reader = None
    if fformat == LUIS:
        reader = LuisReader()
    elif fformat == WIT:
        reader = WitReader()
    elif fformat in DIALOGFLOW_RELEVANT:
        reader = DialogflowReader()
    elif fformat == RASA:
        reader = RasaReader()
    elif fformat == MARKDOWN:
        reader = MarkdownReader()
    return reader
Beispiel #9
0
def _load(filename, language='en'):
    """Loads a single training data file from disk."""

    fformat = _guess_format(filename)

    logger.info("Training data format of {} is {}".format(filename, fformat))

    if fformat == LUIS:
        return LuisReader().read(filename)
    elif fformat == WIT:
        return WitReader().read(filename)
    elif fformat.startswith("dialogflow"):
        return _from_dialogflow_file(filename, language, fformat)
    elif fformat == RASA:
        return RasaReader().read(filename)
    elif fformat == MARKDOWN:
        return MarkdownReader().read(filename)
    else:
        raise ValueError("unknown training file format : {} for "
                         "file {}".format(fformat, filename))
Beispiel #10
0
async def _correct_entities(latest_message: Dict[Text, Any],
                            endpoint: EndpointConfig,
                            sender_id: Text) -> List[Dict[Text, Any]]:
    """Validate the entities of a user message.

    Returns the corrected entities"""
    from rasa_nlu.training_data.formats import MarkdownReader

    parse_original = latest_message.get("parse_data", {})
    entity_str = _as_md_message(parse_original)
    question = questionary.text(
        "Please mark the entities using [value](type) notation",
        default=entity_str)

    annotation = await _ask_questions(question, sender_id, endpoint)
    # noinspection PyProtectedMember
    parse_annotated = MarkdownReader()._parse_training_example(annotation)

    corrected_entities = _merge_annotated_and_original_entities(
        parse_annotated, parse_original)

    return corrected_entities