Exemple #1
0
def test_crf_json_from_BILOU(spacy_nlp, ner_crf_pos_feature_config):
    from rasa.nlu.extractors.crf_entity_extractor import CRFEntityExtractor

    ext = CRFEntityExtractor(component_config=ner_crf_pos_feature_config)
    sentence = "I need a home cleaning close-by"
    doc = {"spacy_doc": spacy_nlp(sentence)}
    r = ext._from_crf_to_json(
        Message(sentence, doc),
        [
            {"O": 1.0},
            {"O": 1.0},
            {"O": 1.0},
            {"B-what": 1.0},
            {"L-what": 1.0},
            {"B-where": 1.0},
            {"I-where": 1.0},
            {"L-where": 1.0},
        ],
    )
    assert len(r) == 2, "There should be two entities"

    assert r[0]["confidence"]  # confidence should exist
    del r[0]["confidence"]
    assert r[0] == {"start": 9, "end": 22, "value": "home cleaning", "entity": "what"}

    assert r[1]["confidence"]  # confidence should exist
    del r[1]["confidence"]
    assert r[1] == {"start": 23, "end": 31, "value": "close-by", "entity": "where"}
Exemple #2
0
def test_crf_json_from_non_BILOU(spacy_nlp, ner_crf_pos_feature_config):
    from rasa.nlu.extractors.crf_entity_extractor import CRFEntityExtractor

    ner_crf_pos_feature_config.update({"BILOU_flag": False})
    ext = CRFEntityExtractor(component_config=ner_crf_pos_feature_config)
    sentence = "I need a home cleaning close-by"
    doc = {"spacy_doc": spacy_nlp(sentence)}
    rs = ext._from_crf_to_json(
        Message(sentence, doc),
        [
            {"O": 1.0},
            {"O": 1.0},
            {"O": 1.0},
            {"what": 1.0},
            {"what": 1.0},
            {"where": 1.0},
            {"where": 1.0},
            {"where": 1.0},
        ],
    )

    # non BILOU will split multi-word entities - hence 5
    assert len(rs) == 5, "There should be five entities"

    for r in rs:
        assert r["confidence"]  # confidence should exist
        del r["confidence"]

    assert rs[0] == {"start": 9, "end": 13, "value": "home", "entity": "what"}
    assert rs[1] == {"start": 14, "end": 22, "value": "cleaning", "entity": "what"}
    assert rs[2] == {"start": 23, "end": 28, "value": "close", "entity": "where"}
    assert rs[3] == {"start": 28, "end": 29, "value": "-", "entity": "where"}
    assert rs[4] == {"start": 29, "end": 31, "value": "by", "entity": "where"}
Exemple #3
0
def test_crf_json_from_BILOU(spacy_nlp):
    ext = CRFEntityExtractor(
        component_config={
            "features": [
                ["low", "title", "upper", "pos", "pos2"],
                [
                    "low",
                    "bias",
                    "suffix3",
                    "suffix2",
                    "upper",
                    "title",
                    "digit",
                    "pos",
                    "pos2",
                ],
                ["low", "title", "upper", "pos", "pos2"],
            ]
        }
    )

    sentence = "I need a home cleaning close-by"

    message = Message(sentence, {SPACY_DOCS[TEXT]: spacy_nlp(sentence)})

    tokenizer = SpacyTokenizer()
    tokenizer.process(message)

    r = ext._from_crf_to_json(
        message,
        [
            {"O": 1.0},
            {"O": 1.0},
            {"O": 1.0},
            {"B-what": 1.0},
            {"L-what": 1.0},
            {"B-where": 1.0},
            {"I-where": 1.0},
            {"L-where": 1.0},
        ],
    )
    assert len(r) == 2, "There should be two entities"

    assert r[0]["confidence"]  # confidence should exist
    del r[0]["confidence"]
    assert r[0] == {"start": 9, "end": 22, "value": "home cleaning", "entity": "what"}

    assert r[1]["confidence"]  # confidence should exist
    del r[1]["confidence"]
    assert r[1] == {"start": 23, "end": 31, "value": "close-by", "entity": "where"}
Exemple #4
0
def test_crf_json_from_non_BILOU(spacy_nlp):
    from rasa.nlu.extractors.crf_entity_extractor import CRFEntityExtractor

    ext = CRFEntityExtractor(
        component_config={
            "BILOU_flag": False,
            "features": [
                ["low", "title", "upper", "pos", "pos2"],
                ["low", "suffix3", "suffix2", "upper", "title", "digit", "pos", "pos2"],
                ["low", "title", "upper", "pos", "pos2"],
            ],
        }
    )
    sentence = "I need a home cleaning close-by"

    message = Message(sentence, {SPACY_DOCS[TEXT]: spacy_nlp(sentence)})

    tokenizer = SpacyTokenizer()
    tokenizer.process(message)

    rs = ext._from_crf_to_json(
        message,
        [
            {"O": 1.0},
            {"O": 1.0},
            {"O": 1.0},
            {"what": 1.0},
            {"what": 1.0},
            {"where": 1.0},
            {"where": 1.0},
            {"where": 1.0},
        ],
    )

    # non BILOU will split multi-word entities - hence 5
    assert len(rs) == 5, "There should be five entities"

    for r in rs:
        assert r["confidence"]  # confidence should exist
        del r["confidence"]

    assert rs[0] == {"start": 9, "end": 13, "value": "home", "entity": "what"}
    assert rs[1] == {"start": 14, "end": 22, "value": "cleaning", "entity": "what"}
    assert rs[2] == {"start": 23, "end": 28, "value": "close", "entity": "where"}
    assert rs[3] == {"start": 28, "end": 29, "value": "-", "entity": "where"}
    assert rs[4] == {"start": 29, "end": 31, "value": "by", "entity": "where"}
Exemple #5
0
def test_crf_json_from_non_BILOU(spacy_nlp, ner_crf_pos_feature_config):
    from rasa.nlu.extractors.crf_entity_extractor import CRFEntityExtractor
    ner_crf_pos_feature_config.update({"BILOU_flag": False})
    ext = CRFEntityExtractor(component_config=ner_crf_pos_feature_config)
    sentence = u"I need a home cleaning close-by"
    doc = {"spacy_doc": spacy_nlp(sentence)}
    rs = ext._from_crf_to_json(Message(sentence, doc), [{
        'O': 1.0
    }, {
        'O': 1.0
    }, {
        'O': 1.0
    }, {
        'what': 1.0
    }, {
        'what': 1.0
    }, {
        'where': 1.0
    }, {
        'where': 1.0
    }, {
        'where': 1.0
    }])

    # non BILOU will split multi-word entities - hence 5
    assert len(rs) == 5, "There should be five entities"

    for r in rs:
        assert r['confidence']  # confidence should exist
        del r['confidence']

    assert rs[0] == {'start': 9, 'end': 13, 'value': 'home', 'entity': 'what'}
    assert rs[1] == {
        'start': 14,
        'end': 22,
        'value': 'cleaning',
        'entity': 'what'
    }
    assert rs[2] == {
        'start': 23,
        'end': 28,
        'value': 'close',
        'entity': 'where'
    }
    assert rs[3] == {'start': 28, 'end': 29, 'value': '-', 'entity': 'where'}
    assert rs[4] == {'start': 29, 'end': 31, 'value': 'by', 'entity': 'where'}
Exemple #6
0
def test_crf_json_from_BILOU(spacy_nlp, ner_crf_pos_feature_config):
    from rasa.nlu.extractors.crf_entity_extractor import CRFEntityExtractor
    ext = CRFEntityExtractor(component_config=ner_crf_pos_feature_config)
    sentence = u"I need a home cleaning close-by"
    doc = {"spacy_doc": spacy_nlp(sentence)}
    r = ext._from_crf_to_json(Message(sentence, doc), [{
        'O': 1.0
    }, {
        'O': 1.0
    }, {
        'O': 1.0
    }, {
        'B-what': 1.0
    }, {
        'L-what': 1.0
    }, {
        'B-where': 1.0
    }, {
        'I-where': 1.0
    }, {
        'L-where': 1.0
    }])
    assert len(r) == 2, "There should be two entities"

    assert r[0]["confidence"]  # confidence should exist
    del r[0]["confidence"]
    assert r[0] == {
        'start': 9,
        'end': 22,
        'value': 'home cleaning',
        'entity': 'what'
    }

    assert r[1]["confidence"]  # confidence should exist
    del r[1]["confidence"]
    assert r[1] == {
        'start': 23,
        'end': 31,
        'value': 'close-by',
        'entity': 'where'
    }