コード例 #1
0
def test_process(
    text: Text,
    lookup: List[Dict[Text, List[Text]]],
    expected_entities: List[Dict[Text, Any]],
):
    message = Message(text)

    training_data = TrainingData()
    training_data.lookup_tables = lookup
    training_data.training_examples = [
        Message("Hi Max!",
                data={"entities": [{
                    "entity": "person",
                    "value": "Max"
                }]}),
        Message(
            "I live in Berlin",
            data={"entities": [{
                "entity": "city",
                "value": "Berlin"
            }]},
        ),
    ]

    entity_extractor = RegexEntityExtractor()
    entity_extractor.train(training_data)
    entity_extractor.process(message)

    entities = message.get(ENTITIES)
    assert entities == expected_entities
コード例 #2
0
def test_extract_patterns(
    lookup_tables: Dict[Text, List[Text]],
    regex_features: Dict[Text, Text],
    expected_patterns: Dict[Text, Text],
):
    training_data = TrainingData()
    if lookup_tables:
        training_data.lookup_tables = [lookup_tables]
    if regex_features:
        training_data.regex_features = [regex_features]

    actual_patterns = pattern_utils.extract_patterns(training_data)

    assert actual_patterns == expected_patterns
コード例 #3
0
def test_do_not_overwrite_any_entities():
    message = Message("Max lives in Berlin.")
    message.set(ENTITIES, [{
        "entity": "person",
        "value": "Max",
        "start": 0,
        "end": 3
    }])

    training_data = TrainingData()
    training_data.training_examples = [
        Message("Hi Max!",
                data={"entities": [{
                    "entity": "person",
                    "value": "Max"
                }]}),
        Message(
            "I live in Berlin",
            data={"entities": [{
                "entity": "city",
                "value": "Berlin"
            }]},
        ),
    ]
    training_data.lookup_tables = [{
        "name":
        "city",
        "elements": ["London", "Berlin", "Amsterdam"]
    }]

    entity_extractor = RegexEntityExtractor()
    entity_extractor.train(training_data)
    entity_extractor.process(message)

    entities = message.get(ENTITIES)
    assert entities == [
        {
            "entity": "person",
            "value": "Max",
            "start": 0,
            "end": 3
        },
        {
            "entity": "city",
            "value": "Berlin",
            "start": 13,
            "end": 19,
            "extractor": "RegexEntityExtractor",
        },
    ]
コード例 #4
0
def test_extract_patterns_use_only_entities_lookup_tables(
        entity: Text, lookup_tables: Dict[Text, Text],
        expected_patterns: Dict[Text, Text]):
    training_data = TrainingData()
    if entity:
        training_data.training_examples = [
            Message("text",
                    data={"entities": [{
                        "entity": entity,
                        "value": "text"
                    }]})
        ]
    if lookup_tables:
        training_data.lookup_tables = [lookup_tables]

    actual_patterns = pattern_utils.extract_patterns(training_data,
                                                     use_only_entities=True)

    assert actual_patterns == expected_patterns
コード例 #5
0
def test_extract_patterns_use_only_lookup_tables_or_regex_features(
    lookup_tables: Dict[Text, List[Text]],
    regex_features: Dict[Text, Text],
    use_lookup_tables: bool,
    use_regex_features: bool,
    expected_patterns: Dict[Text, Text],
):
    training_data = TrainingData()
    if lookup_tables:
        training_data.lookup_tables = [lookup_tables]
    if regex_features:
        training_data.regex_features = [regex_features]

    actual_patterns = pattern_utils.extract_patterns(
        training_data,
        use_lookup_tables=use_lookup_tables,
        use_regexes=use_regex_features,
    )

    assert actual_patterns == expected_patterns
コード例 #6
0
def test_lookup_tables(sentence, expected, labeled_tokens, spacy_nlp):
    from rasa.nlu.featurizers.sparse_featurizer.regex_featurizer import RegexFeaturizer

    lookups = [
        {
            "name":
            "drinks",
            "elements":
            ["mojito", "lemonade", "sweet berry wine", "tea", "club?mate"],
        },
        {
            "name": "plates",
            "elements": "data/test/lookup_tables/plates.txt"
        },
    ]
    ftr = RegexFeaturizer()
    training_data = TrainingData()
    training_data.lookup_tables = lookups
    ftr.train(training_data)

    # adds tokens to the message
    component_config = {"name": "SpacyTokenizer"}
    tokenizer = SpacyTokenizer(component_config)
    message = Message(sentence)
    message.set("text_spacy_doc", spacy_nlp(sentence))
    tokenizer.process(message)

    sequence_features, sentence_features = ftr._features_for_patterns(
        message, TEXT)
    assert np.allclose(sequence_features.toarray(), expected[:-1], atol=1e-10)
    assert np.allclose(sentence_features.toarray(), expected[-1], atol=1e-10)

    # the tokenizer should have added tokens
    assert len(message.get(TOKENS_NAMES[TEXT], [])) > 0
    # the number of regex matches on each token should match
    for i, token in enumerate(message.get(TOKENS_NAMES[TEXT])):
        token_matches = token.get("pattern").values()
        num_matches = sum(token_matches)
        assert num_matches == labeled_tokens.count(i)