Exemple #1
0
    def get_slots(self, text):
        """Extracts slots from the provided text

        Returns:
            list of dict: The list of extracted slots

        Raises:
            NotTrained: When the slot filler is not fitted
        """
        if not self.fitted:
            raise NotTrained("CRFSlotFiller must be fitted")
        tokens = tokenize(text, self.language)
        if not tokens:
            return []
        features = self.compute_features(tokens)
        tags = [_decode_tag(tag) for tag in
                self.crf_model.predict_single(features)]
        slots = tags_to_slots(text, tokens, tags, self.config.tagging_scheme,
                              self.slot_name_mapping)

        builtin_slots_names = set(slot_name for (slot_name, entity) in
                                  iteritems(self.slot_name_mapping)
                                  if is_builtin_entity(entity))
        if not builtin_slots_names:
            return slots

        # Replace tags corresponding to builtin entities by outside tags
        tags = _replace_builtin_tags(tags, builtin_slots_names)
        return self._augment_slots(text, tokens, tags, builtin_slots_names)
Exemple #2
0
    def parse(self, text, intents=None):
        """Performs intent parsing on the provided *text* by first classifying
        the intent and then using the correspond slot filler to extract slots

        Args:
            text (str): Input
            intents (str or list of str): If provided, reduces the scope of
                intent parsing to the provided list of intents

        Returns:
            dict: The most likely intent along with the extracted slots. See
            :func:`.parsing_result` for the output format.

        Raises:
            NotTrained: When the intent parser is not fitted
        """
        if not self.fitted:
            raise NotTrained("ProbabilisticIntentParser must be fitted")

        logger.debug("Probabilistic intent parser parsing '%s'...", text)

        if isinstance(intents, str):
            intents = [intents]

        intent_result = self.intent_classifier.get_intent(text, intents)
        if intent_result is None:
            return empty_result(text)

        intent_name = intent_result[RES_INTENT_NAME]
        slots = self.slot_fillers[intent_name].get_slots(text)
        return parsing_result(text, intent_result, slots)
    def parse(self, text, intents=None):
        """Performs intent parsing on the provided *text*

        Intent and slots are extracted simultaneously through pattern matching

        Args:
            text (str): Input
            intents (str or list of str): If provided, reduces the scope of
            intent parsing to the provided list of intents

        Returns:
            dict: The matched intent, if any, along with the extracted slots.
            See :func:`.parsing_result` for the output format.

        Raises:
            NotTrained: When the intent parser is not fitted
        """
        if not self.fitted:
            raise NotTrained("DeterministicIntentParser must be fitted")

        if isinstance(intents, str):
            intents = [intents]

        ranges_mapping, processed_text = _replace_builtin_entities(
            text, self.language)

        for intent, regexes in iteritems(self.regexes_per_intent):
            if intents is not None and intent not in intents:
                continue
            for regex in regexes:
                match = regex.match(processed_text)
                if match is None:
                    continue
                parsed_intent = intent_classification_result(
                    intent_name=intent, probability=1.0)
                slots = []
                for group_name in match.groupdict():
                    slot_name = self.group_names_to_slot_names[group_name]
                    entity = self.slot_names_to_entities[slot_name]
                    rng = (match.start(group_name), match.end(group_name))
                    value = match.group(group_name)
                    if rng in ranges_mapping:
                        rng = ranges_mapping[rng]
                        value = text[rng[START]:rng[END]]
                    else:
                        rng = {START: rng[0], END: rng[1]}
                    parsed_slot = unresolved_slot(match_range=rng,
                                                  value=value,
                                                  entity=entity,
                                                  slot_name=slot_name)
                    slots.append(parsed_slot)
                parsed_slots = _deduplicate_overlapping_slots(
                    slots, self.language)
                parsed_slots = sorted(parsed_slots,
                                      key=lambda s: s[RES_MATCH_RANGE][START])
                return parsing_result(text, parsed_intent, parsed_slots)
        return empty_result(text)
Exemple #4
0
    def _get_sequence_probability(self, features, labels):
        if not self.fitted:
            raise NotTrained("CRFSlotFiller must be fitted")

        # Use a default substitution label when a label was not seen during
        # training
        substitution_label = OUTSIDE if OUTSIDE in self.labels else \
            self.labels[0]
        cleaned_labels = [
            _encode_tag(substitution_label if l not in self.labels else l)
            for l in labels]
        self.crf_model.tagger_.set(features)
        return self.crf_model.tagger_.probability(cleaned_labels)
Exemple #5
0
    def parse(self, text, intents=None):
        """Performs intent parsing on the provided *text* by calling its intent
        parsers successively

        Args:
            text (str): Input
            intents (str or list of str): If provided, reduces the scope of
                intent parsing to the provided list of intents

        Returns:
            dict: The most likely intent along with the extracted slots. See
            :func:`.parsing_result` for the output format.

        Raises:
            NotTrained: When the nlu engine is not fitted
            TypeError: When input type is not unicode
        """
        logging.info("NLU engine parsing: '%s'...", text)
        if not isinstance(text, str):
            raise TypeError("Expected unicode but received: %s" % type(text))

        if not self.fitted:
            raise NotTrained("SnipsNLUEngine must be fitted")

        if isinstance(intents, str):
            intents = [intents]

        language = self._dataset_metadata["language_code"]
        entities = self._dataset_metadata["entities"]

        for parser in self.intent_parsers:
            res = parser.parse(text, intents)
            if is_empty(res):
                continue
            slots = res[RES_SLOTS]
            scope = [
                s[RES_ENTITY] for s in slots
                if is_builtin_entity(s[RES_ENTITY])
            ]
            resolved_slots = resolve_slots(text, slots, entities, language,
                                           scope)
            return parsing_result(text,
                                  intent=res[RES_INTENT],
                                  slots=resolved_slots)
        return empty_result(text)
    def parse(self, text, intents=None):
        """Performs intent parsing on the provided *text*

        Intent and slots are extracted simultaneously through pattern matching

        Args:
            text (str): Input
            intents (str or list of str): If provided, reduces the scope of
            intent parsing to the provided list of intents

        Returns:
            dict: The matched intent, if any, along with the extracted slots.
            See :func:`.parsing_result` for the output format.

        Raises:
            NotTrained: When the intent parser is not fitted
        """
        if not self.fitted:
            raise NotTrained("DeterministicIntentParser must be fitted")
        logger.debug("DeterministicIntentParser parsing '%s'...", text)

        if isinstance(intents, str):
            intents = [intents]

        ranges_mapping, processed_text = _replace_builtin_entities(
            text, self.language)

        # We try to match both the input text and the preprocessed text to
        # cover inconsistencies between labeled data and builtin entity parsing
        cleaned_text = _replace_tokenized_out_characters(text, self.language)
        cleaned_processed_text = _replace_tokenized_out_characters(
            processed_text, self.language)

        for intent, regexes in iteritems(self.regexes_per_intent):
            if intents is not None and intent not in intents:
                continue
            for regex in regexes:
                res = self._get_matching_result(text, cleaned_processed_text,
                                                regex, intent, ranges_mapping)
                if res is None:
                    res = self._get_matching_result(text, cleaned_text, regex,
                                                    intent)
                if res is not None:
                    return res
        return empty_result(text)
    def get_intent(self, text, intents_filter=None):
        """Performs intent classification on the provided *text*

        Args:
            text (str): Input
            intents_filter (str or list of str): When defined, it will find
                the most likely intent among the list, otherwise it will use
                the whole list of intents defined in the dataset

        Returns:
            dict or None: The most likely intent along with its probability or
            *None* if no intent was found

        Raises:
            NotTrained: When the intent classifier is not fitted

        """
        if not self.fitted:
            raise NotTrained('LogRegIntentClassifier must be fitted')

        if isinstance(intents_filter, str):
            intents_filter = [intents_filter]

        if not text or not self.intent_list \
                or self.featurizer is None or self.classifier is None:
            return None

        if len(self.intent_list) == 1:
            if self.intent_list[0] is None:
                return None
            return intent_classification_result(self.intent_list[0], 1.0)

        # pylint: disable=C0103
        X = self.featurizer.transform([text_to_utterance(text)])
        # pylint: enable=C0103
        proba_vec = self._predict_proba(X, intents_filter=intents_filter)
        intents_probas = sorted(zip(self.intent_list, proba_vec[0]),
                                key=lambda p: -p[1])
        for intent, proba in intents_probas:
            if intent is None:
                return None
            if intents_filter is None or intent in intents_filter:
                return intent_classification_result(intent, proba)
        return None