def test_spacy_featurizer(spacy_nlp_en, sentence, expected): from rasa_nlu.featurizers.spacy_featurizer import SpacyFeaturizer ftr = SpacyFeaturizer(spacy_nlp_en) doc = spacy_nlp_en(sentence) vecs = ftr.features_for_doc(doc) assert np.allclose(doc.vector[:5], expected, atol=1e-5) assert np.allclose(vecs, doc.vector, atol=1e-5)
def test_spacy_featurizer(sentence, expected, spacy_nlp): from rasa_nlu.featurizers.spacy_featurizer import SpacyFeaturizer ftr = SpacyFeaturizer() doc = spacy_nlp(sentence) vecs = ftr.features_for_doc(doc) assert np.allclose(doc.vector[:5], expected, atol=1e-5) assert np.allclose(vecs, doc.vector, atol=1e-5)
class SpacySklearnInterpreter(Interpreter): def __init__(self, entity_extractor=None, entity_synonyms=None, intent_classifier=None, language_name='en', **kwargs): self.extractor = None self.classifier = None self.ent_synonyms = None self.nlp = spacy.load(language_name, parser=False, entity=False, matcher=False) self.featurizer = SpacyFeaturizer(self.nlp) ensure_proper_language_model(self.nlp) if intent_classifier: with open(intent_classifier, 'rb') as f: self.classifier = cloudpickle.load(f) if entity_extractor: self.extractor = SpacyEntityExtractor(self.nlp, entity_extractor) self.ent_synonyms = Interpreter.load_synonyms(entity_synonyms) def get_intent(self, doc): """Returns the most likely intent and its probability for the input text. :param text: text to classify :return: tuple of most likely intent name and its probability""" if self.classifier: X = self.featurizer.features_for_doc(doc).reshape(1, -1) intent_ids, probabilities = self.classifier.predict(X) intents = self.classifier.transform_labels_num2str(intent_ids) intent, score = intents[0], probabilities[0] else: intent, score = "None", 0.0 return intent, score def get_entities(self, doc): if self.extractor: return self.extractor.extract_entities(doc) return [] def parse(self, text): """Parse the input text, classify it and return an object containing its intent and entities.""" doc = self.nlp(text) intent, probability = self.get_intent(doc) entities = self.get_entities(doc) if self.ent_synonyms: Interpreter.replace_synonyms(entities, self.ent_synonyms) return { 'text': text, 'intent': intent, 'entities': entities, 'confidence': probability }
def test_spacy_featurizer(sentence, language, expected): import spacy from rasa_nlu.featurizers.spacy_featurizer import SpacyFeaturizer nlp = spacy.load(language, tagger=False, parser=False) ftr = SpacyFeaturizer(nlp) doc = nlp(sentence) vecs = ftr.features_for_doc(doc) assert np.allclose(doc.vector[:5], expected, atol=1e-5) assert np.allclose(vecs, doc.vector, atol=1e-5)
class SpacySklearnInterpreter(Interpreter): @staticmethod def load(meta, nlp): """ :type meta: ModelMetadata :rtype: MITIEInterpreter """ if meta.entity_extractor_path: extractor = SpacyEntityExtractor(nlp, meta.entity_extractor_path) else: extractor = None if meta.intent_classifier_path: with open(meta.intent_classifier_path, 'rb') as f: classifier = cloudpickle.load(f) else: classifier = None if meta.entity_synonyms_path: entity_synonyms = Interpreter.load_synonyms( meta.entity_synonyms_path) else: entity_synonyms = None return SpacySklearnInterpreter(classifier, extractor, entity_synonyms, nlp) def __init__(self, intent_classifier=None, entity_extractor=None, entity_synonyms=None, nlp=None): self.extractor = entity_extractor self.classifier = intent_classifier self.ent_synonyms = entity_synonyms self.nlp = nlp self.featurizer = SpacyFeaturizer(nlp) ensure_proper_language_model(nlp) def get_intent(self, doc): """Returns the most likely intent and its probability for the input text. :param text: text to classify :return: tuple of most likely intent name and its probability""" if self.classifier: X = self.featurizer.features_for_doc(doc).reshape(1, -1) intent_ids, probabilities = self.classifier.predict(X) intents = self.classifier.transform_labels_num2str(intent_ids) intent, score = intents[0], probabilities[0] else: intent, score = "None", 0.0 return intent, score def get_entities(self, doc): if self.extractor: return self.extractor.extract_entities(doc) return [] def parse(self, text): """Parse the input text, classify it and return an object containing its intent and entities.""" doc = self.nlp(text) intent, probability = self.get_intent(doc) entities = self.get_entities(doc) if self.ent_synonyms: Interpreter.replace_synonyms(entities, self.ent_synonyms) return { 'text': text, 'intent': intent, 'entities': entities, 'confidence': probability }