def __init__(self, intent_classifier_file=None, entity_extractor_file=None, feature_extractor_file=None, **kwargs): if entity_extractor_file: self.extractor = named_entity_extractor(entity_extractor_file) # ,metadata["feature_extractor"]) with open(intent_classifier_file, 'rb') as f: self.classifier = cloudpickle.load(f) self.featurizer = MITIEFeaturizer(feature_extractor_file) self.tokenizer = MITIETokenizer()
def __init__(self, intent_classifier=None, entity_extractor=None, entity_synonyms=None): self.extractor = entity_extractor self.classifier = intent_classifier self.ent_synonyms = entity_synonyms self.tokenizer = MITIETokenizer()
def __init__(self, intent_classifier=None, entity_extractor=None, feature_extractor=None, **kwargs): self.extractor = named_entity_extractor(entity_extractor, feature_extractor) self.classifier = text_categorizer(intent_classifier, feature_extractor) self.tokenizer = MITIETokenizer()
def find_entity(cls, ent, text): tk = MITIETokenizer() tokens, offsets = tk.tokenize_with_offsets(text) if ent["start"] not in offsets: message = u"invalid entity {0} in example {1}:".format(ent, text) + \ u" entities must span whole tokens" raise ValueError(message) start = offsets.index(ent["start"]) _slice = text[ent["start"]:ent["end"]] val_tokens = tokenize(_slice) end = start + len(val_tokens) return start, end
def __init__(self, intent_classifier=None, entity_extractor=None, feature_extractor=None, entity_synonyms=None, **kwargs): self.extractor = None self.classifier = None if entity_extractor: self.extractor = named_entity_extractor(entity_extractor, feature_extractor) if intent_classifier: self.classifier = text_categorizer(intent_classifier, feature_extractor) self.tokenizer = MITIETokenizer() self.ent_synonyms = None if entity_synonyms: Interpreter.load_synonyms(entity_synonyms)
class MITIEInterpreter(Interpreter): def __init__(self, intent_classifier=None, entity_extractor=None, feature_extractor=None, entity_synonyms=None, **kwargs): self.extractor = None self.classifier = None if entity_extractor: self.extractor = named_entity_extractor(entity_extractor, feature_extractor) if intent_classifier: self.classifier = text_categorizer(intent_classifier, feature_extractor) self.tokenizer = MITIETokenizer() self.ent_synonyms = None if entity_synonyms: Interpreter.load_synonyms(entity_synonyms) def get_intent(self, tokens): if self.classifier: label, score = self.classifier(tokens) else: label, score = "None", 0.0 return label, score def parse(self, text): tokens = self.tokenizer.tokenize(text) intent, score = self.get_intent(tokens) entities = get_entities(text, tokens, self.extractor) if self.ent_synonyms: Interpreter.replace_synonyms(entities, self.ent_synonyms) return {'text': text, 'intent': intent, 'entities': entities, 'confidence': score}
def __init__(self, resource_name, backend, language_name): self.intent_examples = [] self.entity_examples = [] self.resource_name = resource_name self.files = util.recursively_find_files(resource_name) self.fformat = self.guess_format(self.files) self.tokenizer = None self.language_name = language_name if backend in ['mitie', 'mitie_sklearn']: from rasa_nlu.tokenizers.mitie_tokenizer import MITIETokenizer self.tokenizer = MITIETokenizer() elif backend in ['spacy_sklearn']: from rasa_nlu.tokenizers.spacy_tokenizer import SpacyTokenizer self.tokenizer = SpacyTokenizer(language_name) else: from rasa_nlu.tokenizers.whitespace_tokenizer import WhitespaceTokenizer self.tokenizer = WhitespaceTokenizer() warnings.warn( "backend not recognised by TrainingData : defaulting to tokenizing by splitting on whitespace" ) if self.fformat == 'luis': self.load_luis_data(self.files[0]) elif self.fformat == 'wit': self.load_wit_data(self.files[0]) elif self.fformat == 'api': self.load_api_data(self.files) elif self.fformat == 'rasa_nlu': self.load_data(self.files[0]) else: raise ValueError("unknown training file format : {0}".format( self.fformat))
class MITIEInterpreter(Interpreter): def __init__(self, intent_classifier=None, entity_extractor=None, feature_extractor=None, **kwargs): self.extractor = named_entity_extractor(entity_extractor, feature_extractor) self.classifier = text_categorizer(intent_classifier, feature_extractor) self.tokenizer = MITIETokenizer() def get_entities(self, text): tokens = self.tokenizer.tokenize(text) ents = [] entities = self.extractor.extract_entities(tokens) for e in entities: _range = e[0] _regex = u"\s*".join(tokens[i] for i in _range) expr = re.compile(_regex) m = expr.search(text) start, end = m.start(), m.end() ents.append({ "entity": e[1], "value": text[start:end], "start": start, "end": end }) return ents def get_intent(self, text): tokens = tokenize(text) label, _ = self.classifier(tokens) # don't use the score return label def parse(self, text): intent = self.get_intent(text) entities = self.get_entities(text) return {'text': text, 'intent': intent, 'entities': entities}
class MITIESklearnInterpreter(Interpreter): def __init__(self, metadata): self.extractor = named_entity_extractor( metadata["entity_extractor"]) # ,metadata["feature_extractor"]) self.classifier = text_categorizer( metadata["intent_classifier"]) # ,metadata["feature_extractor"]) self.tokenizer = MITIETokenizer() def get_entities(self, tokens): d = {} entities = self.extractor.extract_entities(tokens) for e in entities: _range = e[0] d[e[1]] = " ".join(tokens[i] for i in _range) return d def get_intent(self, tokens): label, _ = self.classifier(tokens) # don't use the score return label def parse(self, text): tokens = self.tokenizer.tokenize(text) intent = self.get_intent(tokens) entities = self.get_entities(tokens) return {'intent': intent, 'entities': entities}
def test_mitie(): from rasa_nlu.tokenizers.mitie_tokenizer import MITIETokenizer tk = MITIETokenizer() tk.tokenize(u"Hi. My name is rasa") == [ u'Hi', u'My', u'name', u'is', u'rasa' ] tk.tokenize(u"ὦ ἄνδρες ᾿Αθηναῖοι.") == [u'ὦ', u'ἄνδρες', u'᾿Αθηναῖοι'] tk.tokenize_with_offsets(u"Forecast for lunch") == ([ u'Forecast', u'for', u'lunch' ], [0, 9, 13])
def __init__(self, intent_classifier=None, entity_extractor=None, feature_extractor=None, entity_synonyms=None, **kwargs): self.extractor = None self.classifier = None if entity_extractor: self.extractor = named_entity_extractor(entity_extractor, feature_extractor) if intent_classifier: with open(intent_classifier, 'rb') as f: self.classifier = cloudpickle.load(f) self.featurizer = MITIEFeaturizer(feature_extractor) self.tokenizer = MITIETokenizer() self.ent_synonyms = None if entity_synonyms: self.ent_synonyms = Interpreter.load_synonyms(entity_synonyms)
def init_tokenizer(self, backend, nlp): if backend in [mitie.MITIE_BACKEND_NAME, mitie.MITIE_SKLEARN_BACKEND_NAME]: from rasa_nlu.tokenizers.mitie_tokenizer import MITIETokenizer self.tokenizer = MITIETokenizer() elif backend in [spacy.SPACY_BACKEND_NAME]: from rasa_nlu.tokenizers.spacy_tokenizer import SpacyTokenizer self.tokenizer = SpacyTokenizer(nlp) else: from rasa_nlu.tokenizers.whitespace_tokenizer import WhitespaceTokenizer self.tokenizer = WhitespaceTokenizer() warnings.warn( "backend not recognised by TrainingData : defaulting to tokenizing by splitting on whitespace")
class MITIEInterpreter(Interpreter): @staticmethod def load(meta): """ :type meta: ModelMetadata :rtype: MITIEInterpreter """ if meta.entity_extractor_path: if meta.feature_extractor_path is None or not os.path.isfile(meta.feature_extractor_path): raise Exception("Invalid feature extractor path for MITIE model. Meta data: " + meta) extractor = named_entity_extractor( meta.entity_extractor_path, meta.feature_extractor_path) else: extractor = None if meta.intent_classifier_path: classifier = text_categorizer( meta.intent_classifier_path, meta.feature_extractor_path) else: classifier = None if meta.entity_synonyms_path: entity_synonyms = Interpreter.load_synonyms(meta.entity_synonyms_path) else: entity_synonyms = None return MITIEInterpreter( classifier, extractor, entity_synonyms) def __init__(self, intent_classifier=None, entity_extractor=None, entity_synonyms=None): self.extractor = entity_extractor self.classifier = intent_classifier self.ent_synonyms = entity_synonyms self.tokenizer = MITIETokenizer() def get_intent(self, tokens): if self.classifier: label, score = self.classifier(tokens) else: label, score = "None", 0.0 return label, score def parse(self, text): tokens = self.tokenizer.tokenize(text) intent, score = self.get_intent(tokens) entities = get_entities(text, tokens, self.extractor) if self.ent_synonyms: Interpreter.replace_synonyms(entities, self.ent_synonyms) return {'text': text, 'intent': intent, 'entities': entities, 'confidence': score}
def test_mitie(): from rasa_nlu.tokenizers.mitie_tokenizer import MITIETokenizer tk = MITIETokenizer() tk.tokenize(u"Hi. My name is rasa") == [ u'Hi', u'My', u'name', u'is', u'rasa' ] tk.tokenize(u"ὦ ἄνδρες ᾿Αθηναῖοι.") == [u'ὦ', u'ἄνδρες', u'᾿Αθηναῖοι']
class MITIESklearnInterpreter(Interpreter): def __init__(self, intent_classifier=None, entity_extractor=None, feature_extractor=None, entity_synonyms=None, **kwargs): self.extractor = None self.classifier = None if entity_extractor: self.extractor = named_entity_extractor(entity_extractor, feature_extractor) if intent_classifier: with open(intent_classifier, 'rb') as f: self.classifier = cloudpickle.load(f) self.featurizer = MITIEFeaturizer(feature_extractor) self.tokenizer = MITIETokenizer() self.ent_synonyms = None if entity_synonyms: self.ent_synonyms = Interpreter.load_synonyms(entity_synonyms) def get_intent(self, sentence_tokens): """Returns the most likely intent and its probability for the input text. :param sentence_tokens: text to classify :return: tuple of most likely intent name and its probability""" if self.classifier: X = self.featurizer.features_for_tokens(sentence_tokens).reshape( 1, -1) intent_ids, probabilities = self.classifier.predict(X) intents = self.classifier.transform_labels_num2str(intent_ids) intent, score = intents[0], probabilities[0] else: intent, score = "None", 0.0 return intent, score def parse(self, text): tokens = self.tokenizer.tokenize(text) intent, probability = self.get_intent(tokens) entities = get_entities(text, tokens, self.extractor) if self.ent_synonyms: Interpreter.replace_synonyms(entities, self.ent_synonyms) return { 'text': text, 'intent': intent, 'entities': entities, 'confidence': probability }
def test_mitie_featurizer(): from rasa_nlu.featurizers.mitie_featurizer import MITIEFeaturizer filename = os.environ.get('MITIE_FILE') if not filename or not os.path.isfile(filename): filename = "data/total_word_feature_extractor.dat" ftr = MITIEFeaturizer(filename) sentence = "Hey how are you today" tokens = MITIETokenizer().tokenize(sentence) vecs = ftr.features_for_tokens(tokens) assert np.allclose( vecs[:5], np.array([0., -4.4551446, 0.26073121, -1.46632245, -1.84205751]), atol=1e-5)
def test_mitie(): from rasa_nlu.tokenizers.mitie_tokenizer import MITIETokenizer tk = MITIETokenizer() assert tk.tokenize(u"Hi. My name is rasa") == [ u'Hi', u'My', u'name', u'is', u'rasa' ] assert tk.tokenize(u"ὦ ἄνδρες ᾿Αθηναῖοι") == [ u'ὦ', u'ἄνδρες', u'᾿Αθηναῖοι' ] assert tk.tokenize_with_offsets(u"Forecast for lunch") == ([ u'Forecast', u'for', u'lunch' ], [0, 9, 13]) assert tk.tokenize_with_offsets(u"hey ńöñàśçií how're you?") == ([ u'hey', u'ńöñàśçií', u'how', u'\'re', 'you', '?' ], [0, 4, 13, 16, 20, 23])
class MITIESklearnInterpreter(Interpreter): def __init__(self, intent_classifier_file=None, entity_extractor_file=None, feature_extractor_file=None, **kwargs): if entity_extractor_file: self.extractor = named_entity_extractor(entity_extractor_file) # ,metadata["feature_extractor"]) with open(intent_classifier_file, 'rb') as f: self.classifier = cloudpickle.load(f) self.featurizer = MITIEFeaturizer(feature_extractor_file) self.tokenizer = MITIETokenizer() def get_entities(self, tokens): d = {} entities = self.extractor.extract_entities(tokens) for e in entities: _range = e[0] d[e[1]] = " ".join(tokens[i] for i in _range) return d def get_intent(self, text): """Returns the most likely intent and its probability for the input text. :param text: text to classify :return: tuple of most likely intent name and its probability""" if self.classifier: X = self.featurizer.create_bow_vecs([text]) intent_ids, probabilities = self.classifier.predict(X) intents = self.classifier.transform_labels_num2str(intent_ids) intent, score = intents[0], probabilities[0] else: intent, score = "None", 0.0 return intent, score def parse(self, text): tokens = self.tokenizer.tokenize(text) intent, probability = self.get_intent(tokens) entities = self.get_entities(tokens) return {'text': text, 'intent': intent, 'entities': entities, 'confidence': probability}
def __init__(self, metadata): self.extractor = named_entity_extractor( metadata["entity_extractor"]) # ,metadata["feature_extractor"]) self.classifier = text_categorizer( metadata["intent_classifier"]) # ,metadata["feature_extractor"]) self.tokenizer = MITIETokenizer()
class MITIEInterpreter(Interpreter): def __init__(self, intent_classifier=None, entity_extractor=None, feature_extractor=None, entity_synonyms=None, **kwargs): self.extractor = None self.classifier = None if entity_extractor: self.extractor = named_entity_extractor(entity_extractor, feature_extractor) if intent_classifier: self.classifier = text_categorizer(intent_classifier, feature_extractor) self.tokenizer = MITIETokenizer() self.ent_synonyms = None if entity_synonyms: Interpreter.load_synonyms(entity_synonyms) def get_entities(self, text): tokens = self.tokenizer.tokenize(text) ents = [] if self.extractor: entities = self.extractor.extract_entities(tokens) for e in entities: _range = e[0] _regex = u"\s*".join(re.escape(tokens[i]) for i in _range) expr = re.compile(_regex) m = expr.search(text) start, end = m.start(), m.end() entity_value = text[start:end] ents.append({ "entity": e[1], "value": entity_value, "start": start, "end": end }) return ents def get_intent(self, text): if self.classifier: tokens = tokenize(text) label, score = self.classifier(tokens) else: label, score = "None", 0.0 return label, score def parse(self, text): intent, score = self.get_intent(text) entities = self.get_entities(text) if self.ent_synonyms: Interpreter.replace_synonyms(entities, self.ent_synonyms) return { 'text': text, 'intent': intent, 'entities': entities, 'confidence': score }
class MITIESklearnInterpreter(Interpreter): @staticmethod def load(meta, featurizer=None): """ :type meta: rasa_nlu.model.Metadata :rtype: MITIESklearnInterpreter """ if meta.entity_extractor_path: extractor = named_entity_extractor(meta.entity_extractor_path) else: extractor = None if featurizer is None: featurizer = MITIEFeaturizer(meta.feature_extractor_path) if meta.intent_classifier_path: with open(meta.intent_classifier_path, 'rb') as f: classifier = cloudpickle.load(f) else: classifier = None if meta.entity_synonyms_path: entity_synonyms = Interpreter.load_synonyms( meta.entity_synonyms_path) else: entity_synonyms = None return MITIESklearnInterpreter(classifier, extractor, featurizer, entity_synonyms) def __init__(self, intent_classifier=None, entity_extractor=None, featurizer=None, entity_synonyms=None): self.extractor = entity_extractor self.classifier = intent_classifier self.featurizer = featurizer self.tokenizer = MITIETokenizer() self.ent_synonyms = entity_synonyms def get_intent(self, sentence_tokens): """Returns the most likely intent and its probability for the input text. :param sentence_tokens: text to classify :return: tuple of most likely intent name and its probability""" if self.classifier: X = self.featurizer.features_for_tokens(sentence_tokens).reshape( 1, -1) intent_ids, probabilities = self.classifier.predict(X) intents = self.classifier.transform_labels_num2str(intent_ids) intent, score = intents[0], probabilities[0] else: intent, score = "None", 0.0 return intent, score def parse(self, text): tokens = self.tokenizer.tokenize(text) intent, probability = self.get_intent(tokens) entities = get_entities(text, tokens, self.extractor, self.featurizer) if self.ent_synonyms: Interpreter.replace_synonyms(entities, self.ent_synonyms) return { 'text': text, 'intent': intent, 'entities': entities, 'confidence': probability }
class MITIEInterpreter(Interpreter): @staticmethod def load(meta, featurizer=None): """ :type meta: rasa_nlu.model.Metadata :rtype: MITIEInterpreter """ if meta.entity_extractor_path: extractor = named_entity_extractor(meta.entity_extractor_path) else: extractor = None if meta.intent_classifier_path: classifier = text_categorizer(meta.intent_classifier_path) else: classifier = None if featurizer is None: featurizer = MITIEFeaturizer(meta.feature_extractor_path) if meta.entity_synonyms_path: entity_synonyms = Interpreter.load_synonyms( meta.entity_synonyms_path) else: entity_synonyms = None return MITIEInterpreter(classifier, extractor, featurizer, entity_synonyms) def __init__(self, intent_classifier=None, entity_extractor=None, featurizer=None, entity_synonyms=None): self.extractor = entity_extractor self.featurizer = featurizer self.classifier = intent_classifier self.ent_synonyms = entity_synonyms self.tokenizer = MITIETokenizer() def get_intent(self, tokens): if self.classifier: label, score = self.classifier(tokens, self.featurizer.feature_extractor) else: label, score = "None", 0.0 return label, score def parse(self, text): tokens = self.tokenizer.tokenize(text) intent, score = self.get_intent(tokens) entities = get_entities(text, tokens, self.extractor, self.featurizer) if self.ent_synonyms: Interpreter.replace_synonyms(entities, self.ent_synonyms) return { 'text': text, 'intent': intent, 'entities': entities, 'confidence': score }