def __init__(self, entity_extractor=None, intent_classifier=None, language_name='en', **kwargs):
        self.nlp = spacy.load(language_name, parser=False, entity=False, matcher=False)
        self.featurizer = SpacyFeaturizer(self.nlp)
        with open(intent_classifier, 'rb') as f:
            self.classifier = cloudpickle.load(f)

        self.extractor = SpacyEntityExtractor(self.nlp, entity_extractor)
Exemple #2
0
def test_spacy_featurizer(spacy_nlp_en, sentence, expected):
    from rasa_nlu.featurizers.spacy_featurizer import SpacyFeaturizer
    ftr = SpacyFeaturizer(spacy_nlp_en)
    doc = spacy_nlp_en(sentence)
    vecs = ftr.features_for_doc(doc)
    assert np.allclose(doc.vector[:5], expected, atol=1e-5)
    assert np.allclose(vecs, doc.vector, atol=1e-5)
def test_spacy_featurizer(sentence, expected, spacy_nlp):
    from rasa_nlu.featurizers.spacy_featurizer import SpacyFeaturizer
    ftr = SpacyFeaturizer()
    doc = spacy_nlp(sentence)
    vecs = ftr.features_for_doc(doc)
    assert np.allclose(doc.vector[:5], expected, atol=1e-5)
    assert np.allclose(vecs, doc.vector, atol=1e-5)
Exemple #4
0
def test_spacy_featurizer(sentence, language, expected):
    import spacy
    from rasa_nlu.featurizers.spacy_featurizer import SpacyFeaturizer
    nlp = spacy.load(language, tagger=False, parser=False)
    ftr = SpacyFeaturizer(nlp)
    doc = nlp(sentence)
    vecs = ftr.features_for_doc(doc)
    assert np.allclose(doc.vector[:5], expected, atol=1e-5)
    assert np.allclose(vecs, doc.vector, atol=1e-5)
Exemple #5
0
 def test_sentence(sentence, language, _ref):
     import spacy
     from rasa_nlu.featurizers.spacy_featurizer import SpacyFeaturizer
     nlp = spacy.load(language, tagger=False, parser=False)
     doc = nlp(sentence)
     ftr = SpacyFeaturizer(nlp)
     vecs = ftr.create_bow_vecs([sentence])
     assert np.allclose(doc.vector[:5], _ref, atol=1e-5)
     assert np.allclose(vecs[0], doc.vector, atol=1e-5)
 def __init__(self, config, language_name):
     self.ensure_language_support(language_name)
     self.name = "spacy_sklearn"
     self.language_name = language_name
     self.training_data = None
     self.nlp = spacy.load(self.language_name, parser=False, entity=False)
     self.featurizer = SpacyFeaturizer(self.nlp)
     self.intent_classifier = None
     self.entity_extractor = None
Exemple #7
0
    def __init__(self,
                 intent_classifier=None,
                 entity_extractor=None,
                 entity_synonyms=None,
                 nlp=None):
        self.extractor = entity_extractor
        self.classifier = intent_classifier
        self.ent_synonyms = entity_synonyms
        self.nlp = nlp
        self.featurizer = SpacyFeaturizer(nlp)

        ensure_proper_language_model(nlp)
class SpacySklearnInterpreter(Interpreter):
    def __init__(self, entity_extractor=None, intent_classifier=None, language_name='en', **kwargs):
        self.nlp = spacy.load(language_name, parser=False, entity=False, matcher=False)
        self.featurizer = SpacyFeaturizer(self.nlp)
        with open(intent_classifier, 'rb') as f:
            self.classifier = cloudpickle.load(f)

        self.extractor = SpacyEntityExtractor(self.nlp, entity_extractor)

    def get_intent(self, text):
        """Returns the most likely intent and its probability for the input text.

        :param text: text to classify
        :return: tuple of most likely intent name and its probability"""

        X = self.featurizer.create_bow_vecs([text])
        intent_ids, probabilities = self.classifier.predict(X)
        intents = self.classifier.transform_labels_num2str(intent_ids)
        return intents[0], probabilities[0]

    def parse(self, text):
        """Parse the input text, classify it and return an object containing its intent and entities."""

        intent, probability = self.get_intent(text)
        entities = self.extractor.extract_entities(self.nlp, text)

        return {'text': text, 'intent': intent, 'entities': entities, 'confidence': probability}
    def load(meta, nlp, featurizer=None):
        """
        :type meta: rasa_nlu.model.Metadata
        :type nlp: spacy.language.Language
        :type featurizer: None or rasa_nlu.featurizers.spacy_featurizer.SpacyFeaturizer
        :rtype: MITIEInterpreter
        """
        if meta.entity_extractor_path:
            extractor = SpacyEntityExtractor(nlp, meta.entity_extractor_path,
                                             meta.metadata.get("should_fine_tune_spacy_ner"))
        else:
            extractor = None
        if meta.intent_classifier_path:
            with open(meta.intent_classifier_path, 'rb') as f:
                classifier = cloudpickle.load(f)
        else:
            classifier = None
        if meta.entity_synonyms_path:
            entity_synonyms = Interpreter.load_synonyms(meta.entity_synonyms_path)
        else:
            entity_synonyms = None

        if featurizer is None:
            featurizer = SpacyFeaturizer(nlp)
        return SpacySklearnInterpreter(
            classifier,
            extractor,
            entity_synonyms,
            featurizer,
            nlp)
class SpacySklearnInterpreter(Interpreter):
    def __init__(self,
                 entity_extractor=None,
                 intent_classifier=None,
                 language_name='en',
                 **kwargs):
        self.nlp = spacy.load(language_name,
                              parser=False,
                              entity=False,
                              matcher=False)
        self.featurizer = SpacyFeaturizer(self.nlp)
        with open(intent_classifier, 'rb') as f:
            self.classifier = cloudpickle.load(f)

        self.extractor = SpacyEntityExtractor(self.nlp, entity_extractor)

    def get_intent(self, text):
        X = self.featurizer.create_bow_vecs([text])
        return self.classifier.predict(X)[0]

    def parse(self, text):
        intent = self.get_intent(text)
        entities = self.extractor.extract_entities(self.nlp, text)

        return {'text': text, 'intent': intent, 'entities': entities}
class SpacySklearnInterpreter(Interpreter):
    def __init__(self,
                 entity_extractor=None,
                 entity_synonyms=None,
                 intent_classifier=None,
                 language_name='en',
                 **kwargs):
        self.extractor = None
        self.classifier = None
        self.ent_synonyms = None
        self.nlp = spacy.load(language_name,
                              parser=False,
                              entity=False,
                              matcher=False)
        self.featurizer = SpacyFeaturizer(self.nlp)

        ensure_proper_language_model(self.nlp)

        if intent_classifier:
            with open(intent_classifier, 'rb') as f:
                self.classifier = cloudpickle.load(f)
        if entity_extractor:
            self.extractor = SpacyEntityExtractor(self.nlp, entity_extractor)
        self.ent_synonyms = Interpreter.load_synonyms(entity_synonyms)

    def get_intent(self, doc):
        """Returns the most likely intent and its probability for the input text.

        :param text: text to classify
        :return: tuple of most likely intent name and its probability"""
        if self.classifier:
            X = self.featurizer.features_for_doc(doc).reshape(1, -1)
            intent_ids, probabilities = self.classifier.predict(X)
            intents = self.classifier.transform_labels_num2str(intent_ids)
            intent, score = intents[0], probabilities[0]
        else:
            intent, score = "None", 0.0

        return intent, score

    def get_entities(self, doc):
        if self.extractor:
            return self.extractor.extract_entities(doc)
        return []

    def parse(self, text):
        """Parse the input text, classify it and return an object containing its intent and entities."""
        doc = self.nlp(text)
        intent, probability = self.get_intent(doc)
        entities = self.get_entities(doc)
        if self.ent_synonyms:
            Interpreter.replace_synonyms(entities, self.ent_synonyms)

        return {
            'text': text,
            'intent': intent,
            'entities': entities,
            'confidence': probability
        }
class SpacySklearnTrainer(Trainer):
    SUPPORTED_LANGUAGES = {"en", "de"}

    def __init__(self, config, language_name):
        self.ensure_language_support(language_name)
        self.name = "spacy_sklearn"
        self.language_name = language_name
        self.training_data = None
        self.nlp = spacy.load(self.language_name, parser=False, entity=False)
        self.featurizer = SpacyFeaturizer(self.nlp)
        self.intent_classifier = SklearnIntentClassifier()
        self.entity_extractor = SpacyEntityExtractor()

    def train(self, data, test_split_size=0.1):
        self.training_data = data
        self.train_entity_extractor(data.entity_examples)
        self.train_intent_classifier(data.intent_examples, test_split_size)

    def train_entity_extractor(self, entity_examples):
        self.entity_extractor.train(self.nlp, entity_examples)

    def train_intent_classifier(self, intent_examples, test_split_size=0.1):
        labels = [e["intent"] for e in intent_examples]
        sentences = [e["text"] for e in intent_examples]
        y = self.intent_classifier.transform_labels_str2num(labels)
        X = self.featurizer.create_bow_vecs(sentences)
        self.intent_classifier.train(X, y, test_split_size)

    def persist(self, path, persistor=None, create_unique_subfolder=True):
        timestamp = datetime.datetime.now().strftime('%Y%m%d-%H%M%S')

        if create_unique_subfolder:
            dir_name = os.path.join(path, "model_" + timestamp)
            os.mkdir(dir_name)
        else:
            dir_name = path

        data_file = os.path.join(dir_name, "training_data.json")
        classifier_file = os.path.join(dir_name, "intent_classifier.pkl")
        ner_dir = os.path.join(dir_name, 'ner')
        if not os.path.exists(ner_dir):
            os.mkdir(ner_dir)
        entity_extractor_config_file = os.path.join(ner_dir, "config.json")
        entity_extractor_file = os.path.join(ner_dir, "model")

        write_training_metadata(dir_name, timestamp, data_file, self.name,
                                self.language_name, classifier_file, ner_dir)

        with open(data_file, 'w') as f:
            f.write(self.training_data.as_json(indent=2))
        with open(classifier_file, 'wb') as f:
            cloudpickle.dump(self.intent_classifier, f)
        with open(entity_extractor_config_file, 'w') as f:
            json.dump(self.entity_extractor.ner.cfg, f)

        self.entity_extractor.ner.model.dump(entity_extractor_file)

        if persistor is not None:
            persistor.send_tar_to_s3(dir_name)
Exemple #13
0
 def __init__(self,
              language_name,
              max_num_threads=1,
              should_fine_tune_spacy_ner=False):
     super(self.__class__, self).__init__(language_name, max_num_threads)
     self.should_fine_tune_spacy_ner = should_fine_tune_spacy_ner
     self.nlp = self._load_nlp_model(language_name,
                                     should_fine_tune_spacy_ner)
     self.featurizer = SpacyFeaturizer(self.nlp)
     ensure_proper_language_model(self.nlp)
Exemple #14
0
    def __init__(self,
                 entity_extractor=None,
                 entity_synonyms=None,
                 intent_classifier=None,
                 language_name='en',
                 **kwargs):
        self.extractor = None
        self.classifier = None
        self.ent_synonyms = None
        self.nlp = spacy.load(language_name,
                              parser=False,
                              entity=False,
                              matcher=False)
        self.featurizer = SpacyFeaturizer(self.nlp)

        ensure_proper_language_model(self.nlp)

        if intent_classifier:
            with open(intent_classifier, 'rb') as f:
                self.classifier = cloudpickle.load(f)
        if entity_extractor:
            self.extractor = SpacyEntityExtractor(self.nlp, entity_extractor)
        self.ent_synonyms = Interpreter.load_synonyms(entity_synonyms)
Exemple #15
0
 def __init__(self, language_name, max_num_threads=1):
     super(self.__class__, self).__init__("spacy_sklearn", language_name,
                                          max_num_threads)
     self.nlp = spacy.load(self.language_name, parser=False, entity=False)
     self.featurizer = SpacyFeaturizer(self.nlp)
     ensure_proper_language_model(self.nlp)
Exemple #16
0
class SpacySklearnInterpreter(Interpreter):
    @staticmethod
    def load(meta, nlp):
        """
        :type meta: ModelMetadata
        :rtype: MITIEInterpreter
        """
        if meta.entity_extractor_path:
            extractor = SpacyEntityExtractor(nlp, meta.entity_extractor_path)
        else:
            extractor = None
        if meta.intent_classifier_path:
            with open(meta.intent_classifier_path, 'rb') as f:
                classifier = cloudpickle.load(f)
        else:
            classifier = None
        if meta.entity_synonyms_path:
            entity_synonyms = Interpreter.load_synonyms(
                meta.entity_synonyms_path)
        else:
            entity_synonyms = None
        return SpacySklearnInterpreter(classifier, extractor, entity_synonyms,
                                       nlp)

    def __init__(self,
                 intent_classifier=None,
                 entity_extractor=None,
                 entity_synonyms=None,
                 nlp=None):
        self.extractor = entity_extractor
        self.classifier = intent_classifier
        self.ent_synonyms = entity_synonyms
        self.nlp = nlp
        self.featurizer = SpacyFeaturizer(nlp)

        ensure_proper_language_model(nlp)

    def get_intent(self, doc):
        """Returns the most likely intent and its probability for the input text.

        :param text: text to classify
        :return: tuple of most likely intent name and its probability"""
        if self.classifier:
            X = self.featurizer.features_for_doc(doc).reshape(1, -1)
            intent_ids, probabilities = self.classifier.predict(X)
            intents = self.classifier.transform_labels_num2str(intent_ids)
            intent, score = intents[0], probabilities[0]
        else:
            intent, score = "None", 0.0

        return intent, score

    def get_entities(self, doc):
        if self.extractor:
            return self.extractor.extract_entities(doc)
        return []

    def parse(self, text):
        """Parse the input text, classify it and return an object containing its intent and entities."""
        doc = self.nlp(text)
        intent, probability = self.get_intent(doc)
        entities = self.get_entities(doc)
        if self.ent_synonyms:
            Interpreter.replace_synonyms(entities, self.ent_synonyms)

        return {
            'text': text,
            'intent': intent,
            'entities': entities,
            'confidence': probability
        }
Exemple #17
0
from rasa_nlu.train import load_data
from rasa_nlu.config import RasaNLUModelConfig
from rasa_nlu.utils.spacy_utils import SpacyNLP
from rasa_nlu.tokenizers.spacy_tokenizer import SpacyTokenizer
from rasa_nlu.featurizers.spacy_featurizer import SpacyFeaturizer
import numpy as np, spacy

training_data = load_data("data/examples/rasa/demo-rasa.json")
config = RasaNLUModelConfig()
SpacyNLP(nlp=spacy.load("en")).train(training_data, config)
SpacyTokenizer().train(training_data, config)
SpacyFeaturizer().train(training_data, config)

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC

labels = [e.get("intent") for e in training_data.intent_examples]
le = LabelEncoder()

y = le.fit_transform(labels)
X = np.stack([
    example.get("text_features") for example in training_data.intent_examples
])

defaults = {
    # C parameter of the svm - cross validation will select the best value
    "C": [1, 2, 5, 10, 20, 100],

    # the kernels to use for the svm training - cross validation will
    # decide which one of them performs best