Esempio n. 1
0
def test_check_correct_entity_annotations(
        text: Text, warnings: int, whitespace_tokenizer: WhitespaceTokenizer):
    reader = RasaYAMLReader()

    training_data = reader.reads(text)
    whitespace_tokenizer.process_training_data(training_data)

    with pytest.warns(UserWarning) as record:
        EntityExtractorMixin.check_correct_entity_annotations(training_data)

    assert len(record) == warnings
    assert all([excerpt in record[0].message.args[0]]
               for excerpt in ["Misaligned entity annotation in sentence"])
Esempio n. 2
0
def test_convert_tags_to_entities(
    text: Text,
    tags: Dict[Text, List[Text]],
    confidences: Dict[Text, List[float]],
    expected_entities: List[Dict[Text, Any]],
    whitespace_tokenizer: WhitespaceTokenizer,
):
    extractor = EntityExtractorMixin()

    message = Message(data={TEXT: text})
    tokens = whitespace_tokenizer.tokenize(message, TEXT)

    split_entities_config = {SPLIT_ENTITIES_BY_COMMA: True}
    actual_entities = extractor.convert_predictions_into_entities(
        text, tokens, tags, split_entities_config, confidences)
    assert actual_entities == expected_entities
Esempio n. 3
0
def test_split_entities_by_comma(
    text: Text,
    tags: Dict[Text, List[Text]],
    confidences: Dict[Text, List[float]],
    expected_entities: List[Dict[Text, Any]],
):
    extractor = EntityExtractor()
    tokenizer = WhitespaceTokenizer()

    message = Message(data={TEXT: text})
    tokens = tokenizer.tokenize(message, TEXT)

    split_entities_config = {
        SPLIT_ENTITIES_BY_COMMA: True,
        "address": False,
        "ingredient": True,
    }
    actual_entities = extractor.convert_predictions_into_entities(
        text, tokens, tags, split_entities_config, confidences)

    assert actual_entities == expected_entities