Beispiel #1
0
def test_crf_json_from_non_BILOU(spacy_nlp):
    from rasa_nlu.extractors.crf_entity_extractor import CRFEntityExtractor
    ext = CRFEntityExtractor()
    ext.BILOU_flag = False
    sentence = u"I need a home cleaning close-by"
    doc = {"spacy_doc": spacy_nlp(sentence)}
    rs = ext._from_crf_to_json(Message(sentence, doc),
                               [{'O': 1.0},
                                {'O': 1.0},
                                {'O': 1.0},
                                {'what': 1.0},
                                {'what': 1.0},
                                {'where': 1.0},
                                {'where': 1.0},
                                {'where': 1.0}])

    # non BILOU will split multi-word entities - hence 5
    assert len(rs) == 5, "There should be five entities"

    for r in rs:
        assert r['confidence']  # confidence should exist
        del r['confidence']

    assert rs[0] == {'start': 9, 'end': 13,
                     'value': 'home', 'entity': 'what'}
    assert rs[1] == {'start': 14, 'end': 22,
                     'value': 'cleaning', 'entity': 'what'}
    assert rs[2] == {'start': 23, 'end': 28,
                     'value': 'close', 'entity': 'where'}
    assert rs[3] == {'start': 28, 'end': 29,
                     'value': '-', 'entity': 'where'}
    assert rs[4] == {'start': 29, 'end': 31,
                     'value': 'by', 'entity': 'where'}
def test_crf_json_from_BILOU(spacy_nlp):
    from rasa_nlu.extractors.crf_entity_extractor import CRFEntityExtractor
    ext = CRFEntityExtractor()
    ext.BILOU_flag = True
    sentence = u"I need a home cleaning close-by"
    doc = {"spacy_doc": spacy_nlp(sentence)}
    r = ext._from_crf_to_json(Message(sentence, doc),
                              [{'O': 1.0},
                               {'O': 1.0},
                               {'O': 1.0},
                               {'B-what': 1.0},
                               {'L-what': 1.0},
                               {'B-where': 1.0},
                               {'I-where': 1.0},
                               {'L-where': 1.0}])
    assert len(r) == 2, "There should be two entities"

    assert r[0]["confidence"]  # confidence should exist
    del r[0]["confidence"]
    assert r[0] == {'start': 9, 'end': 22,
                    'value': 'home cleaning', 'entity': 'what'}

    assert r[1]["confidence"]  # confidence should exist
    del r[1]["confidence"]
    assert r[1] == {'start': 23, 'end': 31,
                    'value': 'close-by', 'entity': 'where'}
Beispiel #3
0
def test_crf_json_from_BILOU(spacy_nlp):
    from rasa_nlu.extractors.crf_entity_extractor import CRFEntityExtractor
    ext = CRFEntityExtractor()
    ext.BILOU_flag = True
    sentence = u"I need a home cleaning close-by"
    doc = {"spacy_doc": spacy_nlp(sentence)}
    r = ext._from_crf_to_json(Message(sentence, doc),
                              [{'O': 1.0},
                               {'O': 1.0},
                               {'O': 1.0},
                               {'B-what': 1.0},
                               {'L-what': 1.0},
                               {'B-where': 1.0},
                               {'I-where': 1.0},
                               {'L-where': 1.0}])
    assert len(r) == 2, "There should be two entities"

    assert r[0]["confidence"]  # confidence should exist
    del r[0]["confidence"]
    assert r[0] == {'start': 9, 'end': 22,
                    'value': 'home cleaning', 'entity': 'what'}

    assert r[1]["confidence"]  # confidence should exist
    del r[1]["confidence"]
    assert r[1] == {'start': 23, 'end': 31,
                    'value': 'close-by', 'entity': 'where'}
def test_crf_json_from_non_BILOU(spacy_nlp):
    from rasa_nlu.extractors.crf_entity_extractor import CRFEntityExtractor
    ext = CRFEntityExtractor(component_config={"BILOU_flag": False})
    sentence = u"I need a home cleaning close-by"
    doc = {"spacy_doc": spacy_nlp(sentence)}
    rs = ext._from_crf_to_json(Message(sentence, doc),
                               [{'O': 1.0},
                                {'O': 1.0},
                                {'O': 1.0},
                                {'what': 1.0},
                                {'what': 1.0},
                                {'where': 1.0},
                                {'where': 1.0},
                                {'where': 1.0}])

    # non BILOU will split multi-word entities - hence 5
    assert len(rs) == 5, "There should be five entities"

    for r in rs:
        assert r['confidence']  # confidence should exist
        del r['confidence']

    assert rs[0] == {'start': 9, 'end': 13,
                     'value': 'home', 'entity': 'what'}
    assert rs[1] == {'start': 14, 'end': 22,
                     'value': 'cleaning', 'entity': 'what'}
    assert rs[2] == {'start': 23, 'end': 28,
                     'value': 'close', 'entity': 'where'}
    assert rs[3] == {'start': 28, 'end': 29,
                     'value': '-', 'entity': 'where'}
    assert rs[4] == {'start': 29, 'end': 31,
                     'value': 'by', 'entity': 'where'}
Beispiel #5
0
def test_crf_json_from_BILOU(spacy_nlp):
    from rasa_nlu.extractors.crf_entity_extractor import CRFEntityExtractor
    ext = CRFEntityExtractor()
    ext.BILOU_flag = True
    sentence = u"I need a home cleaning close-by"
    r = ext._from_crf_to_json(spacy_nlp(sentence), ['O', 'O', 'O', 'B-what', 'L-what', 'B-where', 'I-where', 'L-where'])
    assert len(r) == 2, "There should be two entities"
    assert r[0] == {u'start': 9, u'end': 22, u'value': u'home cleaning', u'entity': u'what'}
    assert r[1] == {u'start': 23, u'end': 31, u'value': u'close-by', u'entity': u'where'}
def test_crf_json_from_BILOU(spacy_nlp):
    from rasa_nlu.extractors.crf_entity_extractor import CRFEntityExtractor
    ext = CRFEntityExtractor()
    ext.BILOU_flag = True
    sentence = u"I need a home cleaning close-by"
    r = ext._from_crf_to_json(Message(sentence, {"spacy_doc": spacy_nlp(sentence)}),
                              ['O', 'O', 'O', 'B-what', 'L-what', 'B-where', 'I-where', 'L-where'])
    assert len(r) == 2, "There should be two entities"
    assert r[0] == {u'start': 9, u'end': 22, u'value': u'home cleaning', u'entity': u'what'}
    assert r[1] == {u'start': 23, u'end': 31, u'value': u'close-by', u'entity': u'where'}
Beispiel #7
0
def test_crf_json_from_non_BILOU(spacy_nlp):
    from rasa_nlu.extractors.crf_entity_extractor import CRFEntityExtractor
    ext = CRFEntityExtractor()
    ext.BILOU_flag = False
    sentence = u"I need a home cleaning close-by"
    r = ext._from_crf_to_json(spacy_nlp(sentence), ['O', 'O', 'O', 'what', 'what', 'where', 'where', 'where'])
    assert len(r) == 5, "There should be five entities"  # non BILOU will split multi-word entities - hence 5
    assert r[0] == {u'start': 9, u'end': 13, u'value': u'home', u'entity': u'what'}
    assert r[1] == {u'start': 14, u'end': 22, u'value': u'cleaning', u'entity': u'what'}
    assert r[2] == {u'start': 23, u'end': 28, u'value': u'close', u'entity': u'where'}
    assert r[3] == {u'start': 28, u'end': 29, u'value': u'-', u'entity': u'where'}
    assert r[4] == {u'start': 29, u'end': 31, u'value': u'by', u'entity': u'where'}
def test_crf_json_from_non_BILOU(spacy_nlp):
    from rasa_nlu.extractors.crf_entity_extractor import CRFEntityExtractor
    ext = CRFEntityExtractor()
    ext.BILOU_flag = False
    sentence = u"I need a home cleaning close-by"
    r = ext._from_crf_to_json(Message(sentence, {"spacy_doc": spacy_nlp(sentence)}),
                              ['O', 'O', 'O', 'what', 'what', 'where', 'where', 'where'])
    assert len(r) == 5, "There should be five entities"  # non BILOU will split multi-word entities - hence 5
    assert r[0] == {u'start': 9, u'end': 13, u'value': u'home', u'entity': u'what'}
    assert r[1] == {u'start': 14, u'end': 22, u'value': u'cleaning', u'entity': u'what'}
    assert r[2] == {u'start': 23, u'end': 28, u'value': u'close', u'entity': u'where'}
    assert r[3] == {u'start': 28, u'end': 29, u'value': u'-', u'entity': u'where'}
    assert r[4] == {u'start': 29, u'end': 31, u'value': u'by', u'entity': u'where'}
def test_crf_extractor(spacy_nlp):
    from rasa_nlu.extractors.crf_entity_extractor import CRFEntityExtractor
    ext = CRFEntityExtractor()
    examples = [
        Message("anywhere in the west", {
            "intent": "restaurant_search",
            "entities": [{"start": 16, "end": 20, "value": "west", "entity": "location"}],
            "spacy_doc": spacy_nlp("anywhere in the west")
        }),
        Message("central indian restaurant", {
            "intent": "restaurant_search",
            "entities": [{"start": 0, "end": 7, "value": "central", "entity": "location"}],
            "spacy_doc": spacy_nlp("central indian restaurant")
        })]
    config = {"ner_crf": {"BILOU_flag": True, "features": ext.crf_features}}
    ext.train(TrainingData(training_examples=examples), config)
    sentence = 'anywhere in the west'
    crf_format = ext._from_text_to_crf(Message(sentence, {"spacy_doc": spacy_nlp(sentence)}))
    assert [word[0] for word in crf_format] == ['anywhere', 'in', 'the', 'west']
    feats = ext._sentence_to_features(crf_format)
    assert 'BOS' in feats[0]
    assert 'EOS' in feats[-1]
    assert feats[1]['0:low'] == "in"
    sentence = 'anywhere in the west'
    ext.extract_entities(Message(sentence, {"spacy_doc": spacy_nlp(sentence)}))
Beispiel #10
0
def test_crf_extractor(spacy_nlp):
    from rasa_nlu.extractors.crf_entity_extractor import CRFEntityExtractor
    ext = CRFEntityExtractor()
    examples = [
        Message("anywhere in the west", {
            "intent": "restaurant_search",
            "entities": [{"start": 16, "end": 20, "value": "west", "entity": "location"}],
            "spacy_doc": spacy_nlp("anywhere in the west")
        }),
        Message("central indian restaurant", {
            "intent": "restaurant_search",
            "entities": [{"start": 0, "end": 7, "value": "central", "entity": "location"}],
            "spacy_doc": spacy_nlp("central indian restaurant")
        })]
    config = {"ner_crf": {"BILOU_flag": True, "features": ext.crf_features}}
    ext.train(TrainingData(training_examples=examples), config)
    sentence = 'anywhere in the west'
    crf_format = ext._from_text_to_crf(Message(sentence, {"spacy_doc": spacy_nlp(sentence)}))
    assert [word[0] for word in crf_format] == ['anywhere', 'in', 'the', 'west']
    feats = ext._sentence_to_features(crf_format)
    assert 'BOS' in feats[0]
    assert 'EOS' in feats[-1]
    assert feats[1]['0:low'] == "in"
    sentence = 'anywhere in the west'
    ext.extract_entities(Message(sentence, {"spacy_doc": spacy_nlp(sentence)}))
Beispiel #11
0
def test_crf_extractor(spacy_nlp, ner_crf_pos_feature_config):
    from rasa_nlu.extractors.crf_entity_extractor import CRFEntityExtractor
    ext = CRFEntityExtractor(component_config=ner_crf_pos_feature_config)
    examples = [
        Message("anywhere in the west", {
            "intent": "restaurant_search",
            "entities": [{"start": 16, "end": 20,
                          "value": "west", "entity": "location"}],
            "spacy_doc": spacy_nlp("anywhere in the west")
        }),
        Message("central indian restaurant", {
            "intent": "restaurant_search",
            "entities": [
                {"start": 0, "end": 7, "value": "central",
                 "entity": "location", "extractor": "random_extractor"},
                {"start": 8, "end": 14, "value": "indian",
                 "entity": "cuisine", "extractor": "ner_crf"}
            ],
            "spacy_doc": spacy_nlp("central indian restaurant")
        })]

    # uses BILOU and the default features
    ext.train(TrainingData(training_examples=examples), RasaNLUModelConfig())
    sentence = 'anywhere in the west'
    doc = {"spacy_doc": spacy_nlp(sentence)}
    crf_format = ext._from_text_to_crf(Message(sentence, doc))
    assert [word[0] for word in crf_format] == ['anywhere', 'in', 'the', 'west']
    feats = ext._sentence_to_features(crf_format)
    assert 'BOS' in feats[0]
    assert 'EOS' in feats[-1]
    assert feats[1]['0:low'] == "in"
    sentence = 'anywhere in the west'
    ext.extract_entities(Message(sentence, {"spacy_doc": spacy_nlp(sentence)}))
    filtered = ext.filter_trainable_entities(examples)
    assert filtered[0].get('entities') == [
        {"start": 16, "end": 20, "value": "west", "entity": "location"}
    ], 'Entity without extractor remains'
    assert filtered[1].get('entities') == [
        {"start": 8, "end": 14,
         "value": "indian", "entity": "cuisine", "extractor": "ner_crf"}
    ], 'Only ner_crf entity annotation remains'
    assert examples[1].get('entities')[0] == {
        "start": 0, "end": 7,
        "value": "central", "entity": "location",
        "extractor": "random_extractor"
    }, 'Original examples are not mutated'
Beispiel #12
0
def test_crf_extractor(spacy_nlp, ner_crf_pos_feature_config):
    from rasa_nlu.extractors.crf_entity_extractor import CRFEntityExtractor
    ext = CRFEntityExtractor(component_config=ner_crf_pos_feature_config)
    examples = [
        Message("anywhere in the west", {
            "intent": "restaurant_search",
            "entities": [{"start": 16, "end": 20,
                          "value": "west", "entity": "location"}],
            "spacy_doc": spacy_nlp("anywhere in the west")
        }),
        Message("central indian restaurant", {
            "intent": "restaurant_search",
            "entities": [
                {"start": 0, "end": 7, "value": "central",
                 "entity": "location", "extractor": "random_extractor"},
                {"start": 8, "end": 14, "value": "indian",
                 "entity": "cuisine", "extractor": "CRFEntityExtractor"}
            ],
            "spacy_doc": spacy_nlp("central indian restaurant")
        })]

    # uses BILOU and the default features
    ext.train(TrainingData(training_examples=examples), RasaNLUModelConfig())
    sentence = 'anywhere in the west'
    doc = {"spacy_doc": spacy_nlp(sentence)}
    crf_format = ext._from_text_to_crf(Message(sentence, doc))
    assert [word[0] for word in crf_format] == ['anywhere', 'in',
                                                'the', 'west']
    feats = ext._sentence_to_features(crf_format)
    assert 'BOS' in feats[0]
    assert 'EOS' in feats[-1]
    assert feats[1]['0:low'] == "in"
    sentence = 'anywhere in the west'
    ext.extract_entities(Message(sentence, {"spacy_doc": spacy_nlp(sentence)}))
    filtered = ext.filter_trainable_entities(examples)
    assert filtered[0].get('entities') == [
        {"start": 16, "end": 20, "value": "west", "entity": "location"}
    ], 'Entity without extractor remains'
    assert filtered[1].get('entities') == [
        {"start": 8, "end": 14,
         "value": "indian", "entity": "cuisine",
         "extractor": "CRFEntityExtractor"}
    ], 'Only CRFEntityExtractor entity annotation remains'
    assert examples[1].get('entities')[0] == {
        "start": 0, "end": 7,
        "value": "central", "entity": "location",
        "extractor": "random_extractor"
    }, 'Original examples are not mutated'
Beispiel #13
0
def test_crf_extractor(spacy_nlp):
    from rasa_nlu.extractors.crf_entity_extractor import CRFEntityExtractor
    ext = CRFEntityExtractor()
    examples = [{
        "text":
        "anywhere in the west",
        "intent":
        "restaurant_search",
        "entities": [{
            "start": 16,
            "end": 20,
            "value": "west",
            "entity": "location"
        }]
    }, {
        "text":
        "central indian restaurant",
        "intent":
        "restaurant_search",
        "entities": [{
            "start": 0,
            "end": 7,
            "value": "central",
            "entity": "location"
        }]
    }]
    ext.train(TrainingData(entity_examples_only=examples), spacy_nlp, True,
              ext.crf_features)
    crf_format = ext._from_text_to_crf('anywhere in the west', spacy_nlp)
    assert ([word[0]
             for word in crf_format] == ['anywhere', 'in', 'the', 'west'])
    feats = ext._sentence_to_features(crf_format)
    assert ('BOS' in feats[0])
    assert ('EOS' in feats[-1])
    assert ('0:low:in' in feats[1])
    ext.extract_entities('anywhere in the west', spacy_nlp)
Beispiel #14
0
def test_crf_extractor(spacy_nlp):
    from rasa_nlu.extractors.crf_entity_extractor import CRFEntityExtractor
    ext = CRFEntityExtractor()
    examples = [
        {
            "text": "anywhere in the west",
            "intent": "restaurant_search",
            "entities": [{"start": 16, "end": 20, "value": "west", "entity": "location"}]
        },
        {
            "text": "central indian restaurant",
            "intent": "restaurant_search",
            "entities": [{"start": 0, "end": 7, "value": "central", "entity": "location"}]
        }]
    ext.train(TrainingData(training_examples=examples), spacy_nlp, True, ext.crf_features)
    crf_format = ext._from_text_to_crf('anywhere in the west', spacy_nlp)
    assert ([word[0] for word in crf_format] == ['anywhere', 'in', 'the', 'west'])
    feats = ext._sentence_to_features(crf_format)
    assert ('BOS' in feats[0])
    assert ('EOS' in feats[-1])
    assert ('0:low:in' in feats[1])
    ext.extract_entities('anywhere in the west', spacy_nlp)
Beispiel #15
0
from rasa_nlu.train import load_data
from rasa_nlu.config import RasaNLUModelConfig
from rasa_nlu.utils.spacy_utils import SpacyNLP
from rasa_nlu.tokenizers.spacy_tokenizer import SpacyTokenizer
from rasa_nlu.extractors.crf_entity_extractor import CRFEntityExtractor
import spacy

config = RasaNLUModelConfig()
training_data = load_data("data/examples/rasa/demo-rasa.json")
SpacyNLP(nlp=spacy.load("en")).train(training_data, config)
SpacyTokenizer().train(training_data, config)

print(training_data.training_examples[25].as_dict())

crf = CRFEntityExtractor()
filtered_data = crf.filter_trainable_entities(training_data.training_examples)

# Create Dataset

# dataset = crf._create_dataset(filtered_data)

## Convert Examples

dataset = []

## Convert JSON TO CRF
for training_example in filtered_data:
    entity_offsets = crf._convert_example(training_example)
    print("Entity Offset", entity_offsets)
    # b = crf._from_json_to_crf(training_example, entity_offsets)
    # print("JSON to CRF", b)