コード例 #1
0
ファイル: test_tokenizers.py プロジェクト: Kevark/rasa_nlu
def test_mitie():
    from rasa_nlu.tokenizers.mitie_tokenizer import MITIETokenizer
    tk = MITIETokenizer()

    tk.tokenize(u"Hi. My name is rasa") == [
        u'Hi', u'My', u'name', u'is', u'rasa'
    ]
    tk.tokenize(u"ὦ ἄνδρες ᾿Αθηναῖοι.") == [u'ὦ', u'ἄνδρες', u'᾿Αθηναῖοι']
コード例 #2
0
def test_mitie():
    from rasa_nlu.tokenizers.mitie_tokenizer import MITIETokenizer
    tk = MITIETokenizer()

    tk.tokenize(u"Hi. My name is rasa") == [
        u'Hi', u'My', u'name', u'is', u'rasa'
    ]
    tk.tokenize(u"ὦ ἄνδρες ᾿Αθηναῖοι.") == [u'ὦ', u'ἄνδρες', u'᾿Αθηναῖοι']
    tk.tokenize_with_offsets(u"Forecast for lunch") == ([
        u'Forecast', u'for', u'lunch'
    ], [0, 9, 13])
コード例 #3
0
class MITIESklearnInterpreter(Interpreter):
    def __init__(self, metadata):
        self.extractor = named_entity_extractor(
            metadata["entity_extractor"])  # ,metadata["feature_extractor"])
        self.classifier = text_categorizer(
            metadata["intent_classifier"])  # ,metadata["feature_extractor"])
        self.tokenizer = MITIETokenizer()

    def get_entities(self, tokens):
        d = {}
        entities = self.extractor.extract_entities(tokens)
        for e in entities:
            _range = e[0]
            d[e[1]] = " ".join(tokens[i] for i in _range)
        return d

    def get_intent(self, tokens):
        label, _ = self.classifier(tokens)  # don't use the score
        return label

    def parse(self, text):
        tokens = self.tokenizer.tokenize(text)
        intent = self.get_intent(tokens)
        entities = self.get_entities(tokens)

        return {'intent': intent, 'entities': entities}
コード例 #4
0
def test_mitie():
    from rasa_nlu.tokenizers.mitie_tokenizer import MITIETokenizer
    tk = MITIETokenizer()

    assert tk.tokenize(u"Hi. My name is rasa") == [
        u'Hi', u'My', u'name', u'is', u'rasa'
    ]
    assert tk.tokenize(u"ὦ ἄνδρες ᾿Αθηναῖοι") == [
        u'ὦ', u'ἄνδρες', u'᾿Αθηναῖοι'
    ]
    assert tk.tokenize_with_offsets(u"Forecast for lunch") == ([
        u'Forecast', u'for', u'lunch'
    ], [0, 9, 13])
    assert tk.tokenize_with_offsets(u"hey ńöñàśçií how're you?") == ([
        u'hey', u'ńöñàśçií', u'how', u'\'re', 'you', '?'
    ], [0, 4, 13, 16, 20, 23])
コード例 #5
0
class MITIEInterpreter(Interpreter):
    def __init__(self,
                 intent_classifier=None,
                 entity_extractor=None,
                 feature_extractor=None,
                 entity_synonyms=None,
                 **kwargs):
        self.extractor = None
        self.classifier = None
        if entity_extractor:
            self.extractor = named_entity_extractor(entity_extractor, feature_extractor)
        if intent_classifier:
            self.classifier = text_categorizer(intent_classifier, feature_extractor)
        self.tokenizer = MITIETokenizer()
        self.ent_synonyms = None
        if entity_synonyms:
            Interpreter.load_synonyms(entity_synonyms)

    def get_intent(self, tokens):
        if self.classifier:
            label, score = self.classifier(tokens)
        else:
            label, score = "None", 0.0
        return label, score

    def parse(self, text):
        tokens = self.tokenizer.tokenize(text)
        intent, score = self.get_intent(tokens)
        entities = get_entities(text, tokens, self.extractor)
        if self.ent_synonyms:
            Interpreter.replace_synonyms(entities, self.ent_synonyms)

        return {'text': text, 'intent': intent, 'entities': entities, 'confidence': score}
コード例 #6
0
class MITIEInterpreter(Interpreter):
    def __init__(self, intent_classifier=None, entity_extractor=None, feature_extractor=None, **kwargs):
        self.extractor = named_entity_extractor(entity_extractor, feature_extractor)
        self.classifier = text_categorizer(intent_classifier, feature_extractor)
        self.tokenizer = MITIETokenizer()

    def get_entities(self, text):
        tokens = self.tokenizer.tokenize(text)
        ents = []
        entities = self.extractor.extract_entities(tokens)
        for e in entities:
            _range = e[0]
            _regex = u"\s*".join(tokens[i] for i in _range)
            expr = re.compile(_regex)
            m = expr.search(text)
            start, end = m.start(), m.end()
            ents.append({
                "entity": e[1],
                "value": text[start:end],
                "start": start,
                "end": end
            })

        return ents

    def get_intent(self, text):
        tokens = tokenize(text)
        label, _ = self.classifier(tokens)  # don't use the score
        return label

    def parse(self, text):
        intent = self.get_intent(text)
        entities = self.get_entities(text)

        return {'text': text, 'intent': intent, 'entities': entities}
コード例 #7
0
class MITIEInterpreter(Interpreter):
    @staticmethod
    def load(meta):
        """
        :type meta: ModelMetadata
        :rtype: MITIEInterpreter
        """
        if meta.entity_extractor_path:
            if meta.feature_extractor_path is None or not os.path.isfile(meta.feature_extractor_path):
                raise Exception("Invalid feature extractor path for MITIE model. Meta data: " + meta)
            extractor = named_entity_extractor(
                meta.entity_extractor_path, meta.feature_extractor_path)
        else:
            extractor = None
        if meta.intent_classifier_path:
            classifier = text_categorizer(
                meta.intent_classifier_path, meta.feature_extractor_path)
        else:
            classifier = None
        if meta.entity_synonyms_path:
            entity_synonyms = Interpreter.load_synonyms(meta.entity_synonyms_path)
        else:
            entity_synonyms = None
        return MITIEInterpreter(
            classifier,
            extractor,
            entity_synonyms)

    def __init__(self,
                 intent_classifier=None,
                 entity_extractor=None,
                 entity_synonyms=None):
        self.extractor = entity_extractor
        self.classifier = intent_classifier
        self.ent_synonyms = entity_synonyms
        self.tokenizer = MITIETokenizer()

    def get_intent(self, tokens):
        if self.classifier:
            label, score = self.classifier(tokens)
        else:
            label, score = "None", 0.0
        return label, score

    def parse(self, text):
        tokens = self.tokenizer.tokenize(text)
        intent, score = self.get_intent(tokens)
        entities = get_entities(text, tokens, self.extractor)
        if self.ent_synonyms:
            Interpreter.replace_synonyms(entities, self.ent_synonyms)

        return {'text': text, 'intent': intent, 'entities': entities, 'confidence': score}
コード例 #8
0
class MITIESklearnInterpreter(Interpreter):
    def __init__(self,
                 intent_classifier=None,
                 entity_extractor=None,
                 feature_extractor=None,
                 entity_synonyms=None,
                 **kwargs):
        self.extractor = None
        self.classifier = None
        if entity_extractor:
            self.extractor = named_entity_extractor(entity_extractor,
                                                    feature_extractor)
        if intent_classifier:
            with open(intent_classifier, 'rb') as f:
                self.classifier = cloudpickle.load(f)
        self.featurizer = MITIEFeaturizer(feature_extractor)
        self.tokenizer = MITIETokenizer()
        self.ent_synonyms = None
        if entity_synonyms:
            self.ent_synonyms = Interpreter.load_synonyms(entity_synonyms)

    def get_intent(self, sentence_tokens):
        """Returns the most likely intent and its probability for the input text.

        :param sentence_tokens: text to classify
        :return: tuple of most likely intent name and its probability"""
        if self.classifier:
            X = self.featurizer.features_for_tokens(sentence_tokens).reshape(
                1, -1)
            intent_ids, probabilities = self.classifier.predict(X)
            intents = self.classifier.transform_labels_num2str(intent_ids)
            intent, score = intents[0], probabilities[0]
        else:
            intent, score = "None", 0.0

        return intent, score

    def parse(self, text):
        tokens = self.tokenizer.tokenize(text)
        intent, probability = self.get_intent(tokens)
        entities = get_entities(text, tokens, self.extractor)
        if self.ent_synonyms:
            Interpreter.replace_synonyms(entities, self.ent_synonyms)

        return {
            'text': text,
            'intent': intent,
            'entities': entities,
            'confidence': probability
        }
コード例 #9
0
class MITIESklearnInterpreter(Interpreter):
    def __init__(self, intent_classifier_file=None, entity_extractor_file=None, feature_extractor_file=None, **kwargs):
        if entity_extractor_file:
            self.extractor = named_entity_extractor(entity_extractor_file)  # ,metadata["feature_extractor"])
        with open(intent_classifier_file, 'rb') as f:
            self.classifier = cloudpickle.load(f)
        self.featurizer = MITIEFeaturizer(feature_extractor_file)
        self.tokenizer = MITIETokenizer()

    def get_entities(self, tokens):
        d = {}
        entities = self.extractor.extract_entities(tokens)
        for e in entities:
            _range = e[0]
            d[e[1]] = " ".join(tokens[i] for i in _range)
        return d

    def get_intent(self, text):
        """Returns the most likely intent and its probability for the input text.

        :param text: text to classify
        :return: tuple of most likely intent name and its probability"""
        if self.classifier:
            X = self.featurizer.create_bow_vecs([text])
            intent_ids, probabilities = self.classifier.predict(X)
            intents = self.classifier.transform_labels_num2str(intent_ids)
            intent, score = intents[0], probabilities[0]
        else:
            intent, score = "None", 0.0

        return intent, score

    def parse(self, text):
        tokens = self.tokenizer.tokenize(text)
        intent, probability = self.get_intent(tokens)
        entities = self.get_entities(tokens)

        return {'text': text, 'intent': intent, 'entities': entities, 'confidence': probability}
コード例 #10
0
class MITIEInterpreter(Interpreter):
    def __init__(self,
                 intent_classifier=None,
                 entity_extractor=None,
                 feature_extractor=None,
                 entity_synonyms=None,
                 **kwargs):
        self.extractor = None
        self.classifier = None
        if entity_extractor:
            self.extractor = named_entity_extractor(entity_extractor,
                                                    feature_extractor)
        if intent_classifier:
            self.classifier = text_categorizer(intent_classifier,
                                               feature_extractor)
        self.tokenizer = MITIETokenizer()
        self.ent_synonyms = None
        if entity_synonyms:
            Interpreter.load_synonyms(entity_synonyms)

    def get_entities(self, text):
        tokens = self.tokenizer.tokenize(text)
        ents = []
        if self.extractor:
            entities = self.extractor.extract_entities(tokens)
            for e in entities:
                _range = e[0]
                _regex = u"\s*".join(re.escape(tokens[i]) for i in _range)
                expr = re.compile(_regex)
                m = expr.search(text)
                start, end = m.start(), m.end()
                entity_value = text[start:end]
                ents.append({
                    "entity": e[1],
                    "value": entity_value,
                    "start": start,
                    "end": end
                })

        return ents

    def get_intent(self, text):
        if self.classifier:
            tokens = tokenize(text)
            label, score = self.classifier(tokens)
        else:
            label, score = "None", 0.0
        return label, score

    def parse(self, text):
        intent, score = self.get_intent(text)
        entities = self.get_entities(text)
        if self.ent_synonyms:
            Interpreter.replace_synonyms(entities, self.ent_synonyms)

        return {
            'text': text,
            'intent': intent,
            'entities': entities,
            'confidence': score
        }
コード例 #11
0
class MITIESklearnInterpreter(Interpreter):
    @staticmethod
    def load(meta, featurizer=None):
        """
        :type meta: rasa_nlu.model.Metadata
        :rtype: MITIESklearnInterpreter
        """
        if meta.entity_extractor_path:
            extractor = named_entity_extractor(meta.entity_extractor_path)
        else:
            extractor = None

        if featurizer is None:
            featurizer = MITIEFeaturizer(meta.feature_extractor_path)

        if meta.intent_classifier_path:
            with open(meta.intent_classifier_path, 'rb') as f:
                classifier = cloudpickle.load(f)
        else:
            classifier = None
        if meta.entity_synonyms_path:
            entity_synonyms = Interpreter.load_synonyms(
                meta.entity_synonyms_path)
        else:
            entity_synonyms = None

        return MITIESklearnInterpreter(classifier, extractor, featurizer,
                                       entity_synonyms)

    def __init__(self,
                 intent_classifier=None,
                 entity_extractor=None,
                 featurizer=None,
                 entity_synonyms=None):
        self.extractor = entity_extractor
        self.classifier = intent_classifier
        self.featurizer = featurizer
        self.tokenizer = MITIETokenizer()
        self.ent_synonyms = entity_synonyms

    def get_intent(self, sentence_tokens):
        """Returns the most likely intent and its probability for the input text.

        :param sentence_tokens: text to classify
        :return: tuple of most likely intent name and its probability"""
        if self.classifier:
            X = self.featurizer.features_for_tokens(sentence_tokens).reshape(
                1, -1)
            intent_ids, probabilities = self.classifier.predict(X)
            intents = self.classifier.transform_labels_num2str(intent_ids)
            intent, score = intents[0], probabilities[0]
        else:
            intent, score = "None", 0.0

        return intent, score

    def parse(self, text):
        tokens = self.tokenizer.tokenize(text)
        intent, probability = self.get_intent(tokens)
        entities = get_entities(text, tokens, self.extractor, self.featurizer)
        if self.ent_synonyms:
            Interpreter.replace_synonyms(entities, self.ent_synonyms)

        return {
            'text': text,
            'intent': intent,
            'entities': entities,
            'confidence': probability
        }
コード例 #12
0
class MITIEInterpreter(Interpreter):
    @staticmethod
    def load(meta, featurizer=None):
        """
        :type meta: rasa_nlu.model.Metadata
        :rtype: MITIEInterpreter
        """
        if meta.entity_extractor_path:
            extractor = named_entity_extractor(meta.entity_extractor_path)
        else:
            extractor = None

        if meta.intent_classifier_path:
            classifier = text_categorizer(meta.intent_classifier_path)
        else:
            classifier = None

        if featurizer is None:
            featurizer = MITIEFeaturizer(meta.feature_extractor_path)

        if meta.entity_synonyms_path:
            entity_synonyms = Interpreter.load_synonyms(
                meta.entity_synonyms_path)
        else:
            entity_synonyms = None

        return MITIEInterpreter(classifier, extractor, featurizer,
                                entity_synonyms)

    def __init__(self,
                 intent_classifier=None,
                 entity_extractor=None,
                 featurizer=None,
                 entity_synonyms=None):
        self.extractor = entity_extractor
        self.featurizer = featurizer
        self.classifier = intent_classifier
        self.ent_synonyms = entity_synonyms
        self.tokenizer = MITIETokenizer()

    def get_intent(self, tokens):
        if self.classifier:
            label, score = self.classifier(tokens,
                                           self.featurizer.feature_extractor)
        else:
            label, score = "None", 0.0
        return label, score

    def parse(self, text):
        tokens = self.tokenizer.tokenize(text)
        intent, score = self.get_intent(tokens)
        entities = get_entities(text, tokens, self.extractor, self.featurizer)
        if self.ent_synonyms:
            Interpreter.replace_synonyms(entities, self.ent_synonyms)

        return {
            'text': text,
            'intent': intent,
            'entities': entities,
            'confidence': score
        }