예제 #1
0
def test_spacy_intent_featurizer(spacy_nlp_component: SpacyNLP,
                                 spacy_model: SpacyModel):
    td = loading.load_data("data/examples/rasa/demo-rasa.json")
    spacy_nlp_component.process_training_data(td, spacy_model)
    spacy_featurizer = create_spacy_featurizer({})
    spacy_featurizer.process_training_data(td)

    intent_features_exist = np.array([
        True if example.get("intent_features") is not None else False
        for example in td.intent_examples
    ])

    # no intent features should have been set
    assert not any(intent_features_exist)
예제 #2
0
async def test_train_persist_with_different_configurations(
    crf_entity_extractor: Callable[[Dict[Text, Any]], CRFEntityExtractor],
    config_params: Dict[Text, Any],
    default_model_storage: ModelStorage,
    default_execution_context: ExecutionContext,
    spacy_tokenizer: SpacyTokenizer,
    spacy_featurizer: SpacyFeaturizer,
    spacy_nlp_component: SpacyNLP,
    spacy_model: SpacyModel,
):

    crf_extractor = crf_entity_extractor(config_params)

    importer = RasaFileImporter(training_data_paths=["data/examples/rasa"])
    training_data = importer.get_nlu_data()

    training_data = spacy_nlp_component.process_training_data(
        training_data, spacy_model)
    training_data = spacy_tokenizer.process_training_data(training_data)
    training_data = spacy_featurizer.process_training_data(training_data)
    crf_extractor.train(training_data)

    message = Message(data={TEXT: "I am looking for an italian restaurant"})
    messages = spacy_nlp_component.process([message], spacy_model)
    messages = spacy_tokenizer.process(messages)
    message = spacy_featurizer.process(messages)[0]
    message2 = copy.deepcopy(message)

    processed_message = crf_extractor.process([message])[0]

    loaded_extractor = CRFEntityExtractor.load(
        {
            **CRFEntityExtractor.get_default_config(),
            **config_params
        },
        default_model_storage,
        Resource("CRFEntityExtractor"),
        default_execution_context,
    )

    processed_message2 = loaded_extractor.process([message2])[0]

    assert processed_message2.fingerprint() == processed_message.fingerprint()

    detected_entities = processed_message2.get(ENTITIES)

    assert len(detected_entities) == 1
    assert detected_entities[0]["entity"] == "cuisine"
    assert detected_entities[0]["value"] == "italian"
예제 #3
0
def test_spacy_preprocessor_process_training_data(
        spacy_nlp_component: SpacyNLP, spacy_model: SpacyModel):
    training_data = TrainingDataImporter.load_from_dict(training_data_paths=[
        "data/test_e2ebot/data/nlu.yml",
        "data/test_e2ebot/data/stories.yml",
    ]).get_nlu_data()

    spacy_nlp_component.process_training_data(training_data, spacy_model)

    for message in training_data.training_examples:
        for attr in DENSE_FEATURIZABLE_ATTRIBUTES:
            attr_text = message.data.get(attr)
            if attr_text:
                doc = message.data[SPACY_DOCS[attr]]
                assert isinstance(doc, spacy.tokens.doc.Doc)
                assert doc.text == attr_text.lower()
예제 #4
0
def test_spacy_training_sample_alignment(spacy_nlp_component: SpacyNLP,
                                         spacy_model: SpacyModel):
    from spacy.tokens import Doc

    m1 = Message.build(text="I have a feeling", intent="feeling")
    m2 = Message.build(text="", intent="feeling")
    m3 = Message.build(text="I am the last message", intent="feeling")
    td = TrainingData(training_examples=[m1, m2, m3])

    attribute_docs = spacy_nlp_component._docs_for_training_data(
        spacy_model.model, td)

    assert isinstance(attribute_docs["text"][0], Doc)
    assert isinstance(attribute_docs["text"][1], Doc)
    assert isinstance(attribute_docs["text"][2], Doc)

    assert [t.text for t in attribute_docs["text"][0]
            ] == ["i", "have", "a", "feeling"]
    assert [t.text for t in attribute_docs["text"][1]] == []
    assert [t.text for t in attribute_docs["text"][2]] == [
        "i",
        "am",
        "the",
        "last",
        "message",
    ]
예제 #5
0
def create_spacy_nlp_component(
    model_name: Text = "en_core_web_md", case_sensitive: Optional[bool] = None
) -> SpacyNLP:
    component = SpacyNLP.create(
        {"model": model_name, "case_sensitive": case_sensitive}, None, None, None
    )
    return component
예제 #6
0
def test_persist_and_load(
    training_data: TrainingData,
    default_sklearn_intent_classifier: SklearnIntentClassifier,
    default_model_storage: ModelStorage,
    default_execution_context: ExecutionContext,
    train_and_preprocess: Callable[..., Tuple[TrainingData, List[GraphComponent]]],
    spacy_nlp_component: SpacyNLP,
    spacy_model: SpacyModel,
):
    training_data = spacy_nlp_component.process_training_data(
        training_data, spacy_model
    )

    training_data, loaded_pipeline = train_and_preprocess(
        pipeline=[{"component": SpacyTokenizer}, {"component": SpacyFeaturizer}],
        training_data=training_data,
    )
    default_sklearn_intent_classifier.train(training_data)

    loaded = SklearnIntentClassifier.load(
        SklearnIntentClassifier.get_default_config(),
        default_model_storage,
        Resource("sklearn"),
        default_execution_context,
    )

    predicted = copy.deepcopy(training_data)
    actual = copy.deepcopy(training_data)
    loaded_messages = loaded.process(predicted.training_examples)
    trained_messages = default_sklearn_intent_classifier.process(
        actual.training_examples
    )

    for m1, m2 in zip(loaded_messages, trained_messages):
        assert m1.get("intent") == m2.get("intent")
예제 #7
0
파일: ner.py 프로젝트: ash-pramila/chiron
    def __init__(self, component_config=None, matcher=None):
        super(SpacyPatternNER, self).__init__(component_config)

        spacy_model_name = component_config.get("model")

        if spacy_model_name:
            self.spacy_nlp = SpacyNLP.load_model(spacy_model_name)
        else:
            self.spacy_nlp = spacy.blank('en')

        if matcher:
            self.matcher = matcher
        else:
            self.matcher = Matcher(self.spacy_nlp.vocab)
예제 #8
0
def test_process_unfeaturized_input(
    training_data: TrainingData,
    default_sklearn_intent_classifier: SklearnIntentClassifier,
    default_model_storage: ModelStorage,
    default_execution_context: ExecutionContext,
    train_and_preprocess: Callable[..., Tuple[TrainingData,
                                              List[GraphComponent]]],
    spacy_nlp_component: SpacyNLP,
    spacy_model: SpacyModel,
):
    training_data = spacy_nlp_component.process_training_data(
        training_data, spacy_model)
    training_data, loaded_pipeline = train_and_preprocess(
        pipeline=[
            {
                "component": SpacyTokenizer
            },
            {
                "component": SpacyFeaturizer
            },
        ],
        training_data=training_data,
    )
    default_sklearn_intent_classifier.train(training_data)
    classifier = SklearnIntentClassifier.load(
        SklearnIntentClassifier.get_default_config(),
        default_model_storage,
        Resource("sklearn"),
        default_execution_context,
    )
    message_text = "message text"
    message = Message(data={TEXT: message_text})
    processed_message = classifier.process([message])[0]

    assert processed_message.get(TEXT) == message_text
    assert not processed_message.get(INTENT)
예제 #9
0
파일: conftest.py 프로젝트: FGA-GCES/rasa
def spacy_model(spacy_nlp_component: SpacyNLP) -> SpacyModel:
    return spacy_nlp_component.provide()
예제 #10
0
파일: conftest.py 프로젝트: FGA-GCES/rasa
def spacy_nlp_component() -> SpacyNLP:
    return SpacyNLP.create({"model": "en_core_web_md"}, Mock(), Mock(), Mock())
예제 #11
0
def test_model_raises_error_not_exist():
    """It should throw a direct error when a model doesn't exist."""
    with pytest.raises(InvalidModelError):
        SpacyNLP.create({"model": "dinosaurhead"}, RasaNLUModelConfig())
예제 #12
0
def test_model_fallback_raises_warning(lang: str):
    """Make sure we raise a warning but we will perform a fallback."""
    with pytest.warns(FutureWarning):
        SpacyNLP._check_model_fallback(None, lang, warn=True)