class SpacySklearnInterpreter(Interpreter):
    def __init__(self,
                 entity_extractor=None,
                 intent_classifier=None,
                 language_name='en',
                 **kwargs):
        self.nlp = spacy.load(language_name,
                              parser=False,
                              entity=False,
                              matcher=False)
        self.featurizer = SpacyFeaturizer(self.nlp)
        with open(intent_classifier, 'rb') as f:
            self.classifier = cloudpickle.load(f)

        self.extractor = SpacyEntityExtractor(self.nlp, entity_extractor)

    def get_intent(self, text):
        X = self.featurizer.create_bow_vecs([text])
        return self.classifier.predict(X)[0]

    def parse(self, text):
        intent = self.get_intent(text)
        entities = self.extractor.extract_entities(self.nlp, text)

        return {'text': text, 'intent': intent, 'entities': entities}
class SpacySklearnInterpreter(Interpreter):
    def __init__(self, entity_extractor=None, intent_classifier=None, language_name='en', **kwargs):
        self.nlp = spacy.load(language_name, parser=False, entity=False, matcher=False)
        self.featurizer = SpacyFeaturizer(self.nlp)
        with open(intent_classifier, 'rb') as f:
            self.classifier = cloudpickle.load(f)

        self.extractor = SpacyEntityExtractor(self.nlp, entity_extractor)

    def get_intent(self, text):
        """Returns the most likely intent and its probability for the input text.

        :param text: text to classify
        :return: tuple of most likely intent name and its probability"""

        X = self.featurizer.create_bow_vecs([text])
        intent_ids, probabilities = self.classifier.predict(X)
        intents = self.classifier.transform_labels_num2str(intent_ids)
        return intents[0], probabilities[0]

    def parse(self, text):
        """Parse the input text, classify it and return an object containing its intent and entities."""

        intent, probability = self.get_intent(text)
        entities = self.extractor.extract_entities(self.nlp, text)

        return {'text': text, 'intent': intent, 'entities': entities, 'confidence': probability}
class SpacySklearnTrainer(Trainer):
    SUPPORTED_LANGUAGES = {"en", "de"}

    def __init__(self, config, language_name):
        self.ensure_language_support(language_name)
        self.name = "spacy_sklearn"
        self.language_name = language_name
        self.training_data = None
        self.nlp = spacy.load(self.language_name, parser=False, entity=False)
        self.featurizer = SpacyFeaturizer(self.nlp)
        self.intent_classifier = SklearnIntentClassifier()
        self.entity_extractor = SpacyEntityExtractor()

    def train(self, data, test_split_size=0.1):
        self.training_data = data
        self.train_entity_extractor(data.entity_examples)
        self.train_intent_classifier(data.intent_examples, test_split_size)

    def train_entity_extractor(self, entity_examples):
        self.entity_extractor.train(self.nlp, entity_examples)

    def train_intent_classifier(self, intent_examples, test_split_size=0.1):
        labels = [e["intent"] for e in intent_examples]
        sentences = [e["text"] for e in intent_examples]
        y = self.intent_classifier.transform_labels_str2num(labels)
        X = self.featurizer.create_bow_vecs(sentences)
        self.intent_classifier.train(X, y, test_split_size)

    def persist(self, path, persistor=None, create_unique_subfolder=True):
        timestamp = datetime.datetime.now().strftime('%Y%m%d-%H%M%S')

        if create_unique_subfolder:
            dir_name = os.path.join(path, "model_" + timestamp)
            os.mkdir(dir_name)
        else:
            dir_name = path

        data_file = os.path.join(dir_name, "training_data.json")
        classifier_file = os.path.join(dir_name, "intent_classifier.pkl")
        ner_dir = os.path.join(dir_name, 'ner')
        if not os.path.exists(ner_dir):
            os.mkdir(ner_dir)
        entity_extractor_config_file = os.path.join(ner_dir, "config.json")
        entity_extractor_file = os.path.join(ner_dir, "model")

        write_training_metadata(dir_name, timestamp, data_file, self.name,
                                self.language_name, classifier_file, ner_dir)

        with open(data_file, 'w') as f:
            f.write(self.training_data.as_json(indent=2))
        with open(classifier_file, 'wb') as f:
            cloudpickle.dump(self.intent_classifier, f)
        with open(entity_extractor_config_file, 'w') as f:
            json.dump(self.entity_extractor.ner.cfg, f)

        self.entity_extractor.ner.model.dump(entity_extractor_file)

        if persistor is not None:
            persistor.send_tar_to_s3(dir_name)
Beispiel #4
0
 def test_sentence(sentence, language, _ref):
     import spacy
     from rasa_nlu.featurizers.spacy_featurizer import SpacyFeaturizer
     nlp = spacy.load(language, tagger=False, parser=False)
     doc = nlp(sentence)
     ftr = SpacyFeaturizer(nlp)
     vecs = ftr.create_bow_vecs([sentence])
     assert np.allclose(doc.vector[:5], _ref, atol=1e-5)
     assert np.allclose(vecs[0], doc.vector, atol=1e-5)