def __init__(self, entity_extractor=None, intent_classifier=None, language_name='en', **kwargs): self.nlp = spacy.load(language_name, parser=False, entity=False, matcher=False) self.featurizer = SpacyFeaturizer(self.nlp) with open(intent_classifier, 'rb') as f: self.classifier = cloudpickle.load(f) self.extractor = SpacyEntityExtractor(self.nlp, entity_extractor)
def test_spacy_featurizer(spacy_nlp_en, sentence, expected): from rasa_nlu.featurizers.spacy_featurizer import SpacyFeaturizer ftr = SpacyFeaturizer(spacy_nlp_en) doc = spacy_nlp_en(sentence) vecs = ftr.features_for_doc(doc) assert np.allclose(doc.vector[:5], expected, atol=1e-5) assert np.allclose(vecs, doc.vector, atol=1e-5)
def test_spacy_featurizer(sentence, expected, spacy_nlp): from rasa_nlu.featurizers.spacy_featurizer import SpacyFeaturizer ftr = SpacyFeaturizer() doc = spacy_nlp(sentence) vecs = ftr.features_for_doc(doc) assert np.allclose(doc.vector[:5], expected, atol=1e-5) assert np.allclose(vecs, doc.vector, atol=1e-5)
def test_spacy_featurizer(sentence, language, expected): import spacy from rasa_nlu.featurizers.spacy_featurizer import SpacyFeaturizer nlp = spacy.load(language, tagger=False, parser=False) ftr = SpacyFeaturizer(nlp) doc = nlp(sentence) vecs = ftr.features_for_doc(doc) assert np.allclose(doc.vector[:5], expected, atol=1e-5) assert np.allclose(vecs, doc.vector, atol=1e-5)
def test_sentence(sentence, language, _ref): import spacy from rasa_nlu.featurizers.spacy_featurizer import SpacyFeaturizer nlp = spacy.load(language, tagger=False, parser=False) doc = nlp(sentence) ftr = SpacyFeaturizer(nlp) vecs = ftr.create_bow_vecs([sentence]) assert np.allclose(doc.vector[:5], _ref, atol=1e-5) assert np.allclose(vecs[0], doc.vector, atol=1e-5)
def __init__(self, config, language_name): self.ensure_language_support(language_name) self.name = "spacy_sklearn" self.language_name = language_name self.training_data = None self.nlp = spacy.load(self.language_name, parser=False, entity=False) self.featurizer = SpacyFeaturizer(self.nlp) self.intent_classifier = None self.entity_extractor = None
def __init__(self, intent_classifier=None, entity_extractor=None, entity_synonyms=None, nlp=None): self.extractor = entity_extractor self.classifier = intent_classifier self.ent_synonyms = entity_synonyms self.nlp = nlp self.featurizer = SpacyFeaturizer(nlp) ensure_proper_language_model(nlp)
class SpacySklearnInterpreter(Interpreter): def __init__(self, entity_extractor=None, intent_classifier=None, language_name='en', **kwargs): self.nlp = spacy.load(language_name, parser=False, entity=False, matcher=False) self.featurizer = SpacyFeaturizer(self.nlp) with open(intent_classifier, 'rb') as f: self.classifier = cloudpickle.load(f) self.extractor = SpacyEntityExtractor(self.nlp, entity_extractor) def get_intent(self, text): """Returns the most likely intent and its probability for the input text. :param text: text to classify :return: tuple of most likely intent name and its probability""" X = self.featurizer.create_bow_vecs([text]) intent_ids, probabilities = self.classifier.predict(X) intents = self.classifier.transform_labels_num2str(intent_ids) return intents[0], probabilities[0] def parse(self, text): """Parse the input text, classify it and return an object containing its intent and entities.""" intent, probability = self.get_intent(text) entities = self.extractor.extract_entities(self.nlp, text) return {'text': text, 'intent': intent, 'entities': entities, 'confidence': probability}
def load(meta, nlp, featurizer=None): """ :type meta: rasa_nlu.model.Metadata :type nlp: spacy.language.Language :type featurizer: None or rasa_nlu.featurizers.spacy_featurizer.SpacyFeaturizer :rtype: MITIEInterpreter """ if meta.entity_extractor_path: extractor = SpacyEntityExtractor(nlp, meta.entity_extractor_path, meta.metadata.get("should_fine_tune_spacy_ner")) else: extractor = None if meta.intent_classifier_path: with open(meta.intent_classifier_path, 'rb') as f: classifier = cloudpickle.load(f) else: classifier = None if meta.entity_synonyms_path: entity_synonyms = Interpreter.load_synonyms(meta.entity_synonyms_path) else: entity_synonyms = None if featurizer is None: featurizer = SpacyFeaturizer(nlp) return SpacySklearnInterpreter( classifier, extractor, entity_synonyms, featurizer, nlp)
class SpacySklearnInterpreter(Interpreter): def __init__(self, entity_extractor=None, intent_classifier=None, language_name='en', **kwargs): self.nlp = spacy.load(language_name, parser=False, entity=False, matcher=False) self.featurizer = SpacyFeaturizer(self.nlp) with open(intent_classifier, 'rb') as f: self.classifier = cloudpickle.load(f) self.extractor = SpacyEntityExtractor(self.nlp, entity_extractor) def get_intent(self, text): X = self.featurizer.create_bow_vecs([text]) return self.classifier.predict(X)[0] def parse(self, text): intent = self.get_intent(text) entities = self.extractor.extract_entities(self.nlp, text) return {'text': text, 'intent': intent, 'entities': entities}
class SpacySklearnInterpreter(Interpreter): def __init__(self, entity_extractor=None, entity_synonyms=None, intent_classifier=None, language_name='en', **kwargs): self.extractor = None self.classifier = None self.ent_synonyms = None self.nlp = spacy.load(language_name, parser=False, entity=False, matcher=False) self.featurizer = SpacyFeaturizer(self.nlp) ensure_proper_language_model(self.nlp) if intent_classifier: with open(intent_classifier, 'rb') as f: self.classifier = cloudpickle.load(f) if entity_extractor: self.extractor = SpacyEntityExtractor(self.nlp, entity_extractor) self.ent_synonyms = Interpreter.load_synonyms(entity_synonyms) def get_intent(self, doc): """Returns the most likely intent and its probability for the input text. :param text: text to classify :return: tuple of most likely intent name and its probability""" if self.classifier: X = self.featurizer.features_for_doc(doc).reshape(1, -1) intent_ids, probabilities = self.classifier.predict(X) intents = self.classifier.transform_labels_num2str(intent_ids) intent, score = intents[0], probabilities[0] else: intent, score = "None", 0.0 return intent, score def get_entities(self, doc): if self.extractor: return self.extractor.extract_entities(doc) return [] def parse(self, text): """Parse the input text, classify it and return an object containing its intent and entities.""" doc = self.nlp(text) intent, probability = self.get_intent(doc) entities = self.get_entities(doc) if self.ent_synonyms: Interpreter.replace_synonyms(entities, self.ent_synonyms) return { 'text': text, 'intent': intent, 'entities': entities, 'confidence': probability }
class SpacySklearnTrainer(Trainer): SUPPORTED_LANGUAGES = {"en", "de"} def __init__(self, config, language_name): self.ensure_language_support(language_name) self.name = "spacy_sklearn" self.language_name = language_name self.training_data = None self.nlp = spacy.load(self.language_name, parser=False, entity=False) self.featurizer = SpacyFeaturizer(self.nlp) self.intent_classifier = SklearnIntentClassifier() self.entity_extractor = SpacyEntityExtractor() def train(self, data, test_split_size=0.1): self.training_data = data self.train_entity_extractor(data.entity_examples) self.train_intent_classifier(data.intent_examples, test_split_size) def train_entity_extractor(self, entity_examples): self.entity_extractor.train(self.nlp, entity_examples) def train_intent_classifier(self, intent_examples, test_split_size=0.1): labels = [e["intent"] for e in intent_examples] sentences = [e["text"] for e in intent_examples] y = self.intent_classifier.transform_labels_str2num(labels) X = self.featurizer.create_bow_vecs(sentences) self.intent_classifier.train(X, y, test_split_size) def persist(self, path, persistor=None, create_unique_subfolder=True): timestamp = datetime.datetime.now().strftime('%Y%m%d-%H%M%S') if create_unique_subfolder: dir_name = os.path.join(path, "model_" + timestamp) os.mkdir(dir_name) else: dir_name = path data_file = os.path.join(dir_name, "training_data.json") classifier_file = os.path.join(dir_name, "intent_classifier.pkl") ner_dir = os.path.join(dir_name, 'ner') if not os.path.exists(ner_dir): os.mkdir(ner_dir) entity_extractor_config_file = os.path.join(ner_dir, "config.json") entity_extractor_file = os.path.join(ner_dir, "model") write_training_metadata(dir_name, timestamp, data_file, self.name, self.language_name, classifier_file, ner_dir) with open(data_file, 'w') as f: f.write(self.training_data.as_json(indent=2)) with open(classifier_file, 'wb') as f: cloudpickle.dump(self.intent_classifier, f) with open(entity_extractor_config_file, 'w') as f: json.dump(self.entity_extractor.ner.cfg, f) self.entity_extractor.ner.model.dump(entity_extractor_file) if persistor is not None: persistor.send_tar_to_s3(dir_name)
def __init__(self, language_name, max_num_threads=1, should_fine_tune_spacy_ner=False): super(self.__class__, self).__init__(language_name, max_num_threads) self.should_fine_tune_spacy_ner = should_fine_tune_spacy_ner self.nlp = self._load_nlp_model(language_name, should_fine_tune_spacy_ner) self.featurizer = SpacyFeaturizer(self.nlp) ensure_proper_language_model(self.nlp)
def __init__(self, entity_extractor=None, entity_synonyms=None, intent_classifier=None, language_name='en', **kwargs): self.extractor = None self.classifier = None self.ent_synonyms = None self.nlp = spacy.load(language_name, parser=False, entity=False, matcher=False) self.featurizer = SpacyFeaturizer(self.nlp) ensure_proper_language_model(self.nlp) if intent_classifier: with open(intent_classifier, 'rb') as f: self.classifier = cloudpickle.load(f) if entity_extractor: self.extractor = SpacyEntityExtractor(self.nlp, entity_extractor) self.ent_synonyms = Interpreter.load_synonyms(entity_synonyms)
def __init__(self, language_name, max_num_threads=1): super(self.__class__, self).__init__("spacy_sklearn", language_name, max_num_threads) self.nlp = spacy.load(self.language_name, parser=False, entity=False) self.featurizer = SpacyFeaturizer(self.nlp) ensure_proper_language_model(self.nlp)
class SpacySklearnInterpreter(Interpreter): @staticmethod def load(meta, nlp): """ :type meta: ModelMetadata :rtype: MITIEInterpreter """ if meta.entity_extractor_path: extractor = SpacyEntityExtractor(nlp, meta.entity_extractor_path) else: extractor = None if meta.intent_classifier_path: with open(meta.intent_classifier_path, 'rb') as f: classifier = cloudpickle.load(f) else: classifier = None if meta.entity_synonyms_path: entity_synonyms = Interpreter.load_synonyms( meta.entity_synonyms_path) else: entity_synonyms = None return SpacySklearnInterpreter(classifier, extractor, entity_synonyms, nlp) def __init__(self, intent_classifier=None, entity_extractor=None, entity_synonyms=None, nlp=None): self.extractor = entity_extractor self.classifier = intent_classifier self.ent_synonyms = entity_synonyms self.nlp = nlp self.featurizer = SpacyFeaturizer(nlp) ensure_proper_language_model(nlp) def get_intent(self, doc): """Returns the most likely intent and its probability for the input text. :param text: text to classify :return: tuple of most likely intent name and its probability""" if self.classifier: X = self.featurizer.features_for_doc(doc).reshape(1, -1) intent_ids, probabilities = self.classifier.predict(X) intents = self.classifier.transform_labels_num2str(intent_ids) intent, score = intents[0], probabilities[0] else: intent, score = "None", 0.0 return intent, score def get_entities(self, doc): if self.extractor: return self.extractor.extract_entities(doc) return [] def parse(self, text): """Parse the input text, classify it and return an object containing its intent and entities.""" doc = self.nlp(text) intent, probability = self.get_intent(doc) entities = self.get_entities(doc) if self.ent_synonyms: Interpreter.replace_synonyms(entities, self.ent_synonyms) return { 'text': text, 'intent': intent, 'entities': entities, 'confidence': probability }
from rasa_nlu.train import load_data from rasa_nlu.config import RasaNLUModelConfig from rasa_nlu.utils.spacy_utils import SpacyNLP from rasa_nlu.tokenizers.spacy_tokenizer import SpacyTokenizer from rasa_nlu.featurizers.spacy_featurizer import SpacyFeaturizer import numpy as np, spacy training_data = load_data("data/examples/rasa/demo-rasa.json") config = RasaNLUModelConfig() SpacyNLP(nlp=spacy.load("en")).train(training_data, config) SpacyTokenizer().train(training_data, config) SpacyFeaturizer().train(training_data, config) from sklearn.preprocessing import LabelEncoder from sklearn.model_selection import GridSearchCV from sklearn.svm import SVC labels = [e.get("intent") for e in training_data.intent_examples] le = LabelEncoder() y = le.fit_transform(labels) X = np.stack([ example.get("text_features") for example in training_data.intent_examples ]) defaults = { # C parameter of the svm - cross validation will select the best value "C": [1, 2, 5, 10, 20, 100], # the kernels to use for the svm training - cross validation will # decide which one of them performs best