def get_slots(self, text): """Extracts slots from the provided text Returns: list of dict: The list of extracted slots Raises: NotTrained: When the slot filler is not fitted """ if not self.fitted: raise NotTrained("CRFSlotFiller must be fitted") tokens = tokenize(text, self.language) if not tokens: return [] features = self.compute_features(tokens) tags = [_decode_tag(tag) for tag in self.crf_model.predict_single(features)] slots = tags_to_slots(text, tokens, tags, self.config.tagging_scheme, self.slot_name_mapping) builtin_slots_names = set(slot_name for (slot_name, entity) in iteritems(self.slot_name_mapping) if is_builtin_entity(entity)) if not builtin_slots_names: return slots # Replace tags corresponding to builtin entities by outside tags tags = _replace_builtin_tags(tags, builtin_slots_names) return self._augment_slots(text, tokens, tags, builtin_slots_names)
def parse(self, text, intents=None): """Performs intent parsing on the provided *text* by first classifying the intent and then using the correspond slot filler to extract slots Args: text (str): Input intents (str or list of str): If provided, reduces the scope of intent parsing to the provided list of intents Returns: dict: The most likely intent along with the extracted slots. See :func:`.parsing_result` for the output format. Raises: NotTrained: When the intent parser is not fitted """ if not self.fitted: raise NotTrained("ProbabilisticIntentParser must be fitted") logger.debug("Probabilistic intent parser parsing '%s'...", text) if isinstance(intents, str): intents = [intents] intent_result = self.intent_classifier.get_intent(text, intents) if intent_result is None: return empty_result(text) intent_name = intent_result[RES_INTENT_NAME] slots = self.slot_fillers[intent_name].get_slots(text) return parsing_result(text, intent_result, slots)
def parse(self, text, intents=None): """Performs intent parsing on the provided *text* Intent and slots are extracted simultaneously through pattern matching Args: text (str): Input intents (str or list of str): If provided, reduces the scope of intent parsing to the provided list of intents Returns: dict: The matched intent, if any, along with the extracted slots. See :func:`.parsing_result` for the output format. Raises: NotTrained: When the intent parser is not fitted """ if not self.fitted: raise NotTrained("DeterministicIntentParser must be fitted") if isinstance(intents, str): intents = [intents] ranges_mapping, processed_text = _replace_builtin_entities( text, self.language) for intent, regexes in iteritems(self.regexes_per_intent): if intents is not None and intent not in intents: continue for regex in regexes: match = regex.match(processed_text) if match is None: continue parsed_intent = intent_classification_result( intent_name=intent, probability=1.0) slots = [] for group_name in match.groupdict(): slot_name = self.group_names_to_slot_names[group_name] entity = self.slot_names_to_entities[slot_name] rng = (match.start(group_name), match.end(group_name)) value = match.group(group_name) if rng in ranges_mapping: rng = ranges_mapping[rng] value = text[rng[START]:rng[END]] else: rng = {START: rng[0], END: rng[1]} parsed_slot = unresolved_slot(match_range=rng, value=value, entity=entity, slot_name=slot_name) slots.append(parsed_slot) parsed_slots = _deduplicate_overlapping_slots( slots, self.language) parsed_slots = sorted(parsed_slots, key=lambda s: s[RES_MATCH_RANGE][START]) return parsing_result(text, parsed_intent, parsed_slots) return empty_result(text)
def _get_sequence_probability(self, features, labels): if not self.fitted: raise NotTrained("CRFSlotFiller must be fitted") # Use a default substitution label when a label was not seen during # training substitution_label = OUTSIDE if OUTSIDE in self.labels else \ self.labels[0] cleaned_labels = [ _encode_tag(substitution_label if l not in self.labels else l) for l in labels] self.crf_model.tagger_.set(features) return self.crf_model.tagger_.probability(cleaned_labels)
def parse(self, text, intents=None): """Performs intent parsing on the provided *text* by calling its intent parsers successively Args: text (str): Input intents (str or list of str): If provided, reduces the scope of intent parsing to the provided list of intents Returns: dict: The most likely intent along with the extracted slots. See :func:`.parsing_result` for the output format. Raises: NotTrained: When the nlu engine is not fitted TypeError: When input type is not unicode """ logging.info("NLU engine parsing: '%s'...", text) if not isinstance(text, str): raise TypeError("Expected unicode but received: %s" % type(text)) if not self.fitted: raise NotTrained("SnipsNLUEngine must be fitted") if isinstance(intents, str): intents = [intents] language = self._dataset_metadata["language_code"] entities = self._dataset_metadata["entities"] for parser in self.intent_parsers: res = parser.parse(text, intents) if is_empty(res): continue slots = res[RES_SLOTS] scope = [ s[RES_ENTITY] for s in slots if is_builtin_entity(s[RES_ENTITY]) ] resolved_slots = resolve_slots(text, slots, entities, language, scope) return parsing_result(text, intent=res[RES_INTENT], slots=resolved_slots) return empty_result(text)
def parse(self, text, intents=None): """Performs intent parsing on the provided *text* Intent and slots are extracted simultaneously through pattern matching Args: text (str): Input intents (str or list of str): If provided, reduces the scope of intent parsing to the provided list of intents Returns: dict: The matched intent, if any, along with the extracted slots. See :func:`.parsing_result` for the output format. Raises: NotTrained: When the intent parser is not fitted """ if not self.fitted: raise NotTrained("DeterministicIntentParser must be fitted") logger.debug("DeterministicIntentParser parsing '%s'...", text) if isinstance(intents, str): intents = [intents] ranges_mapping, processed_text = _replace_builtin_entities( text, self.language) # We try to match both the input text and the preprocessed text to # cover inconsistencies between labeled data and builtin entity parsing cleaned_text = _replace_tokenized_out_characters(text, self.language) cleaned_processed_text = _replace_tokenized_out_characters( processed_text, self.language) for intent, regexes in iteritems(self.regexes_per_intent): if intents is not None and intent not in intents: continue for regex in regexes: res = self._get_matching_result(text, cleaned_processed_text, regex, intent, ranges_mapping) if res is None: res = self._get_matching_result(text, cleaned_text, regex, intent) if res is not None: return res return empty_result(text)
def get_intent(self, text, intents_filter=None): """Performs intent classification on the provided *text* Args: text (str): Input intents_filter (str or list of str): When defined, it will find the most likely intent among the list, otherwise it will use the whole list of intents defined in the dataset Returns: dict or None: The most likely intent along with its probability or *None* if no intent was found Raises: NotTrained: When the intent classifier is not fitted """ if not self.fitted: raise NotTrained('LogRegIntentClassifier must be fitted') if isinstance(intents_filter, str): intents_filter = [intents_filter] if not text or not self.intent_list \ or self.featurizer is None or self.classifier is None: return None if len(self.intent_list) == 1: if self.intent_list[0] is None: return None return intent_classification_result(self.intent_list[0], 1.0) # pylint: disable=C0103 X = self.featurizer.transform([text_to_utterance(text)]) # pylint: enable=C0103 proba_vec = self._predict_proba(X, intents_filter=intents_filter) intents_probas = sorted(zip(self.intent_list, proba_vec[0]), key=lambda p: -p[1]) for intent, proba in intents_probas: if intent is None: return None if intents_filter is None or intent in intents_filter: return intent_classification_result(intent, proba) return None