def test_spacy_featurizer(sentence, expected, spacy_nlp): from rasa.nlu.featurizers import spacy_featurizer doc = spacy_nlp(sentence) vecs = spacy_featurizer.features_for_doc(doc) assert np.allclose(doc.vector[:5], expected, atol=1e-5) assert np.allclose(vecs, doc.vector, atol=1e-5)
def test_spacy_featurizer_casing(spacy_nlp): from rasa.nlu.featurizers import spacy_featurizer # if this starts failing for the default model, we should think about # removing the lower casing the spacy nlp component does when it # retrieves vectors. For compressed spacy models (e.g. models # ending in _sm) this test will most likely fail. td = training_data.load_data('data/examples/rasa/demo-rasa.json') for e in td.intent_examples: doc = spacy_nlp(e.text) doc_capitalized = spacy_nlp(e.text.capitalize()) vecs = spacy_featurizer.features_for_doc(doc) vecs_capitalized = spacy_featurizer.features_for_doc(doc_capitalized) assert np.allclose(vecs, vecs_capitalized, atol=1e-5), \ "Vectors are unequal for texts '{}' and '{}'".format( e.text, e.text.capitalize())