def test_train_tokenizer(text, expected_tokens, expected_indices, spacy_nlp):
    tk = SpacyTokenizer()

    message = Message(text)
    message.set(SPACY_DOCS[TEXT], spacy_nlp(text))
    message.set(RESPONSE, text)
    message.set(SPACY_DOCS[RESPONSE], spacy_nlp(text))

    training_data = TrainingData()
    training_data.training_examples = [message]

    tk.train(training_data)

    for attribute in [RESPONSE, TEXT]:
        tokens = training_data.training_examples[0].get(TOKENS_NAMES[attribute])

        assert [t.text for t in tokens] == expected_tokens
        assert [t.start for t in tokens] == [i[0] for i in expected_indices]
        assert [t.end for t in tokens] == [i[1] for i in expected_indices]
def test_extract_patterns_use_only_entities_lookup_tables(
        entity: Text, lookup_tables: Dict[Text, Text],
        expected_patterns: Dict[Text, Text]):
    training_data = TrainingData()
    if entity:
        training_data.training_examples = [
            Message("text",
                    data={"entities": [{
                        "entity": entity,
                        "value": "text"
                    }]})
        ]
    if lookup_tables:
        training_data.lookup_tables = [lookup_tables]

    actual_patterns = pattern_utils.extract_patterns(training_data,
                                                     use_only_entities=True)

    assert actual_patterns == expected_patterns
Beispiel #3
0
def test_train_tokenizer(text, expected_tokens, expected_indices):
    tk = WhitespaceTokenizer()

    message = Message(text)
    message.set(RESPONSE, text)
    message.set(INTENT, text)

    training_data = TrainingData()
    training_data.training_examples = [message]

    tk.train(training_data)

    for attribute in [RESPONSE, TEXT]:
        tokens = training_data.training_examples[0].get(
            TOKENS_NAMES[attribute])

        assert [t.text for t in tokens] == expected_tokens
        assert [t.start for t in tokens] == [i[0] for i in expected_indices]
        assert [t.end for t in tokens] == [i[1] for i in expected_indices]

    # check intent attribute
    tokens = training_data.training_examples[0].get(TOKENS_NAMES[INTENT])

    assert [t.text for t in tokens] == [text]