예제 #1
0
    def __init__(self, entity_extractor=None, intent_classifier=None, language_name='en', **kwargs):
        self.nlp = spacy.load(language_name, parser=False, entity=False, matcher=False)
        self.featurizer = SpacyFeaturizer(self.nlp)
        with open(intent_classifier, 'rb') as f:
            self.classifier = cloudpickle.load(f)

        self.extractor = SpacyEntityExtractor(self.nlp, entity_extractor)
예제 #2
0
class SpacySklearnTrainer(Trainer):
    SUPPORTED_LANGUAGES = {"en", "de"}

    def __init__(self, config, language_name):
        self.ensure_language_support(language_name)
        self.name = "spacy_sklearn"
        self.language_name = language_name
        self.training_data = None
        self.nlp = spacy.load(self.language_name, parser=False, entity=False)
        self.featurizer = SpacyFeaturizer(self.nlp)
        self.intent_classifier = SklearnIntentClassifier()
        self.entity_extractor = SpacyEntityExtractor()

    def train(self, data, test_split_size=0.1):
        self.training_data = data
        self.train_entity_extractor(data.entity_examples)
        self.train_intent_classifier(data.intent_examples, test_split_size)

    def train_entity_extractor(self, entity_examples):
        self.entity_extractor.train(self.nlp, entity_examples)

    def train_intent_classifier(self, intent_examples, test_split_size=0.1):
        labels = [e["intent"] for e in intent_examples]
        sentences = [e["text"] for e in intent_examples]
        y = self.intent_classifier.transform_labels_str2num(labels)
        X = self.featurizer.create_bow_vecs(sentences)
        self.intent_classifier.train(X, y, test_split_size)

    def persist(self, path, persistor=None, create_unique_subfolder=True):
        timestamp = datetime.datetime.now().strftime('%Y%m%d-%H%M%S')

        if create_unique_subfolder:
            dir_name = os.path.join(path, "model_" + timestamp)
            os.mkdir(dir_name)
        else:
            dir_name = path

        data_file = os.path.join(dir_name, "training_data.json")
        classifier_file = os.path.join(dir_name, "intent_classifier.pkl")
        ner_dir = os.path.join(dir_name, 'ner')
        if not os.path.exists(ner_dir):
            os.mkdir(ner_dir)
        entity_extractor_config_file = os.path.join(ner_dir, "config.json")
        entity_extractor_file = os.path.join(ner_dir, "model")

        write_training_metadata(dir_name, timestamp, data_file, self.name,
                                self.language_name, classifier_file, ner_dir)

        with open(data_file, 'w') as f:
            f.write(self.training_data.as_json(indent=2))
        with open(classifier_file, 'wb') as f:
            cloudpickle.dump(self.intent_classifier, f)
        with open(entity_extractor_config_file, 'w') as f:
            json.dump(self.entity_extractor.ner.cfg, f)

        self.entity_extractor.ner.model.dump(entity_extractor_file)

        if persistor is not None:
            persistor.send_tar_to_s3(dir_name)
예제 #3
0
 def __init__(self, config, language_name):
     self.ensure_language_support(language_name)
     self.name = "spacy_sklearn"
     self.language_name = language_name
     self.training_data = None
     self.nlp = spacy.load(self.language_name, parser=False, entity=False)
     self.featurizer = SpacyFeaturizer(self.nlp)
     self.intent_classifier = SklearnIntentClassifier()
     self.entity_extractor = SpacyEntityExtractor()
예제 #4
0
def test_spacy_ner_extractor(spacy_nlp):
    ext = SpacyEntityExtractor()
    example = Message("anywhere in the West", {
            "intent": "restaurant_search",
            "entities": [],
            "spacy_doc": spacy_nlp("anywhere in the west")})

    ext.process(example, spacy_nlp=spacy_nlp)

    assert len(example.get("entities", [])) == 1
    assert example.get("entities")[0] == {
        u'start': 16, u'extractor': u'ner_spacy',
        u'end': 20, u'value': u'West', u'entity': u'LOC'}
    def load(meta, nlp, featurizer=None):
        """
        :type meta: rasa_nlu.model.Metadata
        :type nlp: spacy.language.Language
        :type featurizer: None or rasa_nlu.featurizers.spacy_featurizer.SpacyFeaturizer
        :rtype: MITIEInterpreter
        """
        if meta.entity_extractor_path:
            extractor = SpacyEntityExtractor(nlp, meta.entity_extractor_path,
                                             meta.metadata.get("should_fine_tune_spacy_ner"))
        else:
            extractor = None
        if meta.intent_classifier_path:
            with open(meta.intent_classifier_path, 'rb') as f:
                classifier = cloudpickle.load(f)
        else:
            classifier = None
        if meta.entity_synonyms_path:
            entity_synonyms = Interpreter.load_synonyms(meta.entity_synonyms_path)
        else:
            entity_synonyms = None

        if featurizer is None:
            featurizer = SpacyFeaturizer(nlp)
        return SpacySklearnInterpreter(
            classifier,
            extractor,
            entity_synonyms,
            featurizer,
            nlp)
class SpacySklearnInterpreter(Interpreter):
    def __init__(self,
                 entity_extractor=None,
                 intent_classifier=None,
                 language_name='en',
                 **kwargs):
        self.nlp = spacy.load(language_name,
                              parser=False,
                              entity=False,
                              matcher=False)
        self.featurizer = SpacyFeaturizer(self.nlp)
        with open(intent_classifier, 'rb') as f:
            self.classifier = cloudpickle.load(f)

        self.extractor = SpacyEntityExtractor(self.nlp, entity_extractor)

    def get_intent(self, text):
        X = self.featurizer.create_bow_vecs([text])
        return self.classifier.predict(X)[0]

    def parse(self, text):
        intent = self.get_intent(text)
        entities = self.extractor.extract_entities(self.nlp, text)

        return {'text': text, 'intent': intent, 'entities': entities}
예제 #7
0
class SpacySklearnInterpreter(Interpreter):
    def __init__(self, entity_extractor=None, intent_classifier=None, language_name='en', **kwargs):
        self.nlp = spacy.load(language_name, parser=False, entity=False, matcher=False)
        self.featurizer = SpacyFeaturizer(self.nlp)
        with open(intent_classifier, 'rb') as f:
            self.classifier = cloudpickle.load(f)

        self.extractor = SpacyEntityExtractor(self.nlp, entity_extractor)

    def get_intent(self, text):
        """Returns the most likely intent and its probability for the input text.

        :param text: text to classify
        :return: tuple of most likely intent name and its probability"""

        X = self.featurizer.create_bow_vecs([text])
        intent_ids, probabilities = self.classifier.predict(X)
        intents = self.classifier.transform_labels_num2str(intent_ids)
        return intents[0], probabilities[0]

    def parse(self, text):
        """Parse the input text, classify it and return an object containing its intent and entities."""

        intent, probability = self.get_intent(text)
        entities = self.extractor.extract_entities(self.nlp, text)

        return {'text': text, 'intent': intent, 'entities': entities, 'confidence': probability}
class SpacySklearnInterpreter(Interpreter):
    def __init__(self,
                 entity_extractor=None,
                 entity_synonyms=None,
                 intent_classifier=None,
                 language_name='en',
                 **kwargs):
        self.extractor = None
        self.classifier = None
        self.ent_synonyms = None
        self.nlp = spacy.load(language_name,
                              parser=False,
                              entity=False,
                              matcher=False)
        self.featurizer = SpacyFeaturizer(self.nlp)

        ensure_proper_language_model(self.nlp)

        if intent_classifier:
            with open(intent_classifier, 'rb') as f:
                self.classifier = cloudpickle.load(f)
        if entity_extractor:
            self.extractor = SpacyEntityExtractor(self.nlp, entity_extractor)
        self.ent_synonyms = Interpreter.load_synonyms(entity_synonyms)

    def get_intent(self, doc):
        """Returns the most likely intent and its probability for the input text.

        :param text: text to classify
        :return: tuple of most likely intent name and its probability"""
        if self.classifier:
            X = self.featurizer.features_for_doc(doc).reshape(1, -1)
            intent_ids, probabilities = self.classifier.predict(X)
            intents = self.classifier.transform_labels_num2str(intent_ids)
            intent, score = intents[0], probabilities[0]
        else:
            intent, score = "None", 0.0

        return intent, score

    def get_entities(self, doc):
        if self.extractor:
            return self.extractor.extract_entities(doc)
        return []

    def parse(self, text):
        """Parse the input text, classify it and return an object containing its intent and entities."""
        doc = self.nlp(text)
        intent, probability = self.get_intent(doc)
        entities = self.get_entities(doc)
        if self.ent_synonyms:
            Interpreter.replace_synonyms(entities, self.ent_synonyms)

        return {
            'text': text,
            'intent': intent,
            'entities': entities,
            'confidence': probability
        }
예제 #9
0
def test_spacy_ner_extractor(spacy_nlp):
    ext = SpacyEntityExtractor()
    example = Message("anywhere in the West", {
        "intent": "restaurant_search",
        "entities": [],
        "spacy_doc": spacy_nlp("anywhere in the west")})

    ext.process(example, spacy_nlp=spacy_nlp)

    assert len(example.get("entities", [])) == 1
    assert example.get("entities")[0] == {
        'start': 16,
        'extractor': 'ner_spacy',
        'end': 20,
        'value': 'West',
        'entity': 'LOC',
        'confidence': None}
예제 #10
0
class SpacySklearnTrainer(Trainer):
    SUPPORTED_LANGUAGES = {"en", "de"}

    def __init__(self, language_name, max_num_threads=1):
        super(self.__class__, self).__init__("spacy_sklearn", language_name,
                                             max_num_threads)
        self.nlp = spacy.load(self.language_name, parser=False, entity=False)
        self.featurizer = SpacyFeaturizer(self.nlp)
        ensure_proper_language_model(self.nlp)

    def train_entity_extractor(self, entity_examples):
        self.entity_extractor = SpacyEntityExtractor()
        self.entity_extractor = self.entity_extractor.train(
            self.nlp, entity_examples)

    def train_intent_classifier(self, intent_examples, test_split_size=0.1):
        self.intent_classifier = sklearn_trainer_utils.train_intent_classifier(
            intent_examples, self.featurizer, self.max_num_threads,
            test_split_size)

    def persist(self, path, persistor=None, create_unique_subfolder=True):
        entity_extractor_file, entity_extractor_config_file = None, None
        timestamp = datetime.datetime.now().strftime('%Y%m%d-%H%M%S')

        if create_unique_subfolder:
            dir_name = os.path.join(path, "model_" + timestamp)
            os.mkdir(dir_name)
        else:
            dir_name = path

        data_file = os.path.join(dir_name, "training_data.json")
        classifier_file, ner_dir = None, None
        if self.intent_classifier:
            classifier_file = os.path.join(dir_name, "intent_classifier.pkl")
        if self.entity_extractor:
            ner_dir = os.path.join(dir_name, 'ner')
            if not os.path.exists(ner_dir):
                os.mkdir(ner_dir)
            entity_extractor_config_file = os.path.join(ner_dir, "config.json")
            entity_extractor_file = os.path.join(ner_dir, "model")

        write_training_metadata(dir_name, timestamp, data_file, self.name,
                                self.language_name, classifier_file, ner_dir)

        with open(data_file, 'w') as f:
            f.write(self.training_data.as_json(indent=2))
        if self.intent_classifier:
            with open(classifier_file, 'wb') as f:
                cloudpickle.dump(self.intent_classifier, f)
        if self.entity_extractor:
            with open(entity_extractor_config_file, 'w') as f:
                json.dump(self.entity_extractor.ner.cfg, f)

            self.entity_extractor.ner.model.dump(entity_extractor_file)

        if persistor is not None:
            persistor.send_tar_to_s3(dir_name)
예제 #11
0
    def __init__(self,
                 entity_extractor=None,
                 entity_synonyms=None,
                 intent_classifier=None,
                 language_name='en',
                 **kwargs):
        self.extractor = None
        self.classifier = None
        self.ent_synonyms = None
        self.nlp = spacy.load(language_name,
                              parser=False,
                              entity=False,
                              matcher=False)
        self.featurizer = SpacyFeaturizer(self.nlp)

        ensure_proper_language_model(self.nlp)

        if intent_classifier:
            with open(intent_classifier, 'rb') as f:
                self.classifier = cloudpickle.load(f)
        if entity_extractor:
            self.extractor = SpacyEntityExtractor(self.nlp, entity_extractor)
        self.ent_synonyms = Interpreter.load_synonyms(entity_synonyms)
예제 #12
0
 def load(meta, nlp):
     """
     :type meta: ModelMetadata
     :rtype: MITIEInterpreter
     """
     if meta.entity_extractor_path:
         extractor = SpacyEntityExtractor(nlp, meta.entity_extractor_path)
     else:
         extractor = None
     if meta.intent_classifier_path:
         with open(meta.intent_classifier_path, 'rb') as f:
             classifier = cloudpickle.load(f)
     else:
         classifier = None
     if meta.entity_synonyms_path:
         entity_synonyms = Interpreter.load_synonyms(
             meta.entity_synonyms_path)
     else:
         entity_synonyms = None
     return SpacySklearnInterpreter(classifier, extractor, entity_synonyms,
                                    nlp)
예제 #13
0
 def train_entity_extractor(self, entity_examples):
     self.entity_extractor = SpacyEntityExtractor()
     self.entity_extractor.train(self.nlp, entity_examples)
예제 #14
0
 def train_entity_extractor(self, entity_examples):
     self.entity_extractor = SpacyEntityExtractor()
     self.entity_extractor.train(self.nlp, entity_examples,
                                 self.should_fine_tune_spacy_ner)