def test_spacy_intent_featurizer(spacy_nlp_component: SpacyNLP, spacy_model: SpacyModel): td = loading.load_data("data/examples/rasa/demo-rasa.json") spacy_nlp_component.process_training_data(td, spacy_model) spacy_featurizer = create_spacy_featurizer({}) spacy_featurizer.process_training_data(td) intent_features_exist = np.array([ True if example.get("intent_features") is not None else False for example in td.intent_examples ]) # no intent features should have been set assert not any(intent_features_exist)
async def test_train_persist_with_different_configurations( crf_entity_extractor: Callable[[Dict[Text, Any]], CRFEntityExtractor], config_params: Dict[Text, Any], default_model_storage: ModelStorage, default_execution_context: ExecutionContext, spacy_tokenizer: SpacyTokenizer, spacy_featurizer: SpacyFeaturizer, spacy_nlp_component: SpacyNLP, spacy_model: SpacyModel, ): crf_extractor = crf_entity_extractor(config_params) importer = RasaFileImporter(training_data_paths=["data/examples/rasa"]) training_data = importer.get_nlu_data() training_data = spacy_nlp_component.process_training_data( training_data, spacy_model) training_data = spacy_tokenizer.process_training_data(training_data) training_data = spacy_featurizer.process_training_data(training_data) crf_extractor.train(training_data) message = Message(data={TEXT: "I am looking for an italian restaurant"}) messages = spacy_nlp_component.process([message], spacy_model) messages = spacy_tokenizer.process(messages) message = spacy_featurizer.process(messages)[0] message2 = copy.deepcopy(message) processed_message = crf_extractor.process([message])[0] loaded_extractor = CRFEntityExtractor.load( { **CRFEntityExtractor.get_default_config(), **config_params }, default_model_storage, Resource("CRFEntityExtractor"), default_execution_context, ) processed_message2 = loaded_extractor.process([message2])[0] assert processed_message2.fingerprint() == processed_message.fingerprint() detected_entities = processed_message2.get(ENTITIES) assert len(detected_entities) == 1 assert detected_entities[0]["entity"] == "cuisine" assert detected_entities[0]["value"] == "italian"
def test_spacy_preprocessor_process_training_data( spacy_nlp_component: SpacyNLP, spacy_model: SpacyModel): training_data = TrainingDataImporter.load_from_dict(training_data_paths=[ "data/test_e2ebot/data/nlu.yml", "data/test_e2ebot/data/stories.yml", ]).get_nlu_data() spacy_nlp_component.process_training_data(training_data, spacy_model) for message in training_data.training_examples: for attr in DENSE_FEATURIZABLE_ATTRIBUTES: attr_text = message.data.get(attr) if attr_text: doc = message.data[SPACY_DOCS[attr]] assert isinstance(doc, spacy.tokens.doc.Doc) assert doc.text == attr_text.lower()
def test_spacy_training_sample_alignment(spacy_nlp_component: SpacyNLP, spacy_model: SpacyModel): from spacy.tokens import Doc m1 = Message.build(text="I have a feeling", intent="feeling") m2 = Message.build(text="", intent="feeling") m3 = Message.build(text="I am the last message", intent="feeling") td = TrainingData(training_examples=[m1, m2, m3]) attribute_docs = spacy_nlp_component._docs_for_training_data( spacy_model.model, td) assert isinstance(attribute_docs["text"][0], Doc) assert isinstance(attribute_docs["text"][1], Doc) assert isinstance(attribute_docs["text"][2], Doc) assert [t.text for t in attribute_docs["text"][0] ] == ["i", "have", "a", "feeling"] assert [t.text for t in attribute_docs["text"][1]] == [] assert [t.text for t in attribute_docs["text"][2]] == [ "i", "am", "the", "last", "message", ]
def create_spacy_nlp_component( model_name: Text = "en_core_web_md", case_sensitive: Optional[bool] = None ) -> SpacyNLP: component = SpacyNLP.create( {"model": model_name, "case_sensitive": case_sensitive}, None, None, None ) return component
def test_persist_and_load( training_data: TrainingData, default_sklearn_intent_classifier: SklearnIntentClassifier, default_model_storage: ModelStorage, default_execution_context: ExecutionContext, train_and_preprocess: Callable[..., Tuple[TrainingData, List[GraphComponent]]], spacy_nlp_component: SpacyNLP, spacy_model: SpacyModel, ): training_data = spacy_nlp_component.process_training_data( training_data, spacy_model ) training_data, loaded_pipeline = train_and_preprocess( pipeline=[{"component": SpacyTokenizer}, {"component": SpacyFeaturizer}], training_data=training_data, ) default_sklearn_intent_classifier.train(training_data) loaded = SklearnIntentClassifier.load( SklearnIntentClassifier.get_default_config(), default_model_storage, Resource("sklearn"), default_execution_context, ) predicted = copy.deepcopy(training_data) actual = copy.deepcopy(training_data) loaded_messages = loaded.process(predicted.training_examples) trained_messages = default_sklearn_intent_classifier.process( actual.training_examples ) for m1, m2 in zip(loaded_messages, trained_messages): assert m1.get("intent") == m2.get("intent")
def __init__(self, component_config=None, matcher=None): super(SpacyPatternNER, self).__init__(component_config) spacy_model_name = component_config.get("model") if spacy_model_name: self.spacy_nlp = SpacyNLP.load_model(spacy_model_name) else: self.spacy_nlp = spacy.blank('en') if matcher: self.matcher = matcher else: self.matcher = Matcher(self.spacy_nlp.vocab)
def test_process_unfeaturized_input( training_data: TrainingData, default_sklearn_intent_classifier: SklearnIntentClassifier, default_model_storage: ModelStorage, default_execution_context: ExecutionContext, train_and_preprocess: Callable[..., Tuple[TrainingData, List[GraphComponent]]], spacy_nlp_component: SpacyNLP, spacy_model: SpacyModel, ): training_data = spacy_nlp_component.process_training_data( training_data, spacy_model) training_data, loaded_pipeline = train_and_preprocess( pipeline=[ { "component": SpacyTokenizer }, { "component": SpacyFeaturizer }, ], training_data=training_data, ) default_sklearn_intent_classifier.train(training_data) classifier = SklearnIntentClassifier.load( SklearnIntentClassifier.get_default_config(), default_model_storage, Resource("sklearn"), default_execution_context, ) message_text = "message text" message = Message(data={TEXT: message_text}) processed_message = classifier.process([message])[0] assert processed_message.get(TEXT) == message_text assert not processed_message.get(INTENT)
def spacy_model(spacy_nlp_component: SpacyNLP) -> SpacyModel: return spacy_nlp_component.provide()
def spacy_nlp_component() -> SpacyNLP: return SpacyNLP.create({"model": "en_core_web_md"}, Mock(), Mock(), Mock())
def test_model_raises_error_not_exist(): """It should throw a direct error when a model doesn't exist.""" with pytest.raises(InvalidModelError): SpacyNLP.create({"model": "dinosaurhead"}, RasaNLUModelConfig())
def test_model_fallback_raises_warning(lang: str): """Make sure we raise a warning but we will perform a fallback.""" with pytest.warns(FutureWarning): SpacyNLP._check_model_fallback(None, lang, warn=True)