예제 #1
0
    def detect_bulk(self, messages=None, **kwargs):
        """
        Use detector to detect entities from text. It also translates query to language compatible to detector

        Args:
            messages (list of strings): list of natural text(s) on which detection logic is to be run.
        Returns:
            dict or None: dictionary containing entity_value, original_text and detection;
                          entity_value is in itself a dict with its keys varying from entity to entity

        Example:
            1) Consider an example of restaurant detection from a message

                messages = ['i want to order chinese from  mainland china and pizza from domminos']
                output = detect(message=message)
                print output
                    >> [[{'detection': 'message', 'original_text': 'mainland china', 'entity_value':
                    {'value': u'Mainland China'}}, {'detection': 'message', 'original_text': 'domminos',
                    'entity_value': {'value': u"Domino's Pizza"}}]]
        """
        if messages is None:
            messages = []
        if self._language != self._processing_language and self._translation_enabled:
            translation_output_list = [
                translate_text(message_, self._language,
                               self._processing_language)
                for message_ in messages
            ]

            messages = []
            for translation_output in translation_output_list:
                messages.append(translation_output[TRANSLATED_TEXT]
                                if translation_output['status'] else '')

        texts = messages
        bulk_entities_list, bulk_original_texts_list = [], []

        for text in texts:
            entities_list, original_texts_list = self.detect_entity(text=text,
                                                                    **kwargs)
            bulk_entities_list.append(entities_list)
            bulk_original_texts_list.append(original_texts_list)

        values_list, method, original_texts_list = bulk_entities_list, FROM_MESSAGE, bulk_original_texts_list

        return self.output_entity_bulk(
            entity_values_list=values_list,
            original_texts_list=original_texts_list,
            detection_method=method,
            detection_language=self._processing_language)
예제 #2
0
    def detect(self,
               message=None,
               structured_value=None,
               fallback_value=None,
               **kwargs):
        """
        Use detector to detect entities from text. It also translates query to language compatible to detector

        Args:
            message (str): natural text on which detection logic is to be run. Note if structured value is
                                    detection is run on structured value instead of message
            structured_value (str): Value obtained from any structured elements. Note if structured value is
                                    detection is run on structured value instead of message
                                    (For example, UI elements like form, payload, etc)
            fallback_value (str): If the detection logic fails to detect any value either from structured_value
                              or message then we return a fallback_value as an output.
            bot_message (str): previous message from a bot/agent.

        Returns:
            dict or None: dictionary containing entity_value, original_text and detection;
                          entity_value is in itself a dict with its keys varying from entity to entity

        Example:
            1) Consider an example of restaurant detection from a message

                message = 'i want to order chinese from  mainland china and pizza from domminos'
                structured_value = None
                fallback_value = None
                bot_message = None
                output = detect(message=message, structured_value=structured_value,
                                  fallback_value=fallback_value, bot_message=bot_message)
                print output

                    >> [{'detection': 'message', 'original_text': 'mainland china', 'entity_value':
                    {'value': u'Mainland China'}}, {'detection': 'message', 'original_text': 'domminos',
                    'entity_value': {'value': u"Domino's Pizza"}}]

            2) Consider an example of movie name detection from a structured value

                message = 'i wanted to watch movie'
                entity_name = 'movie'
                structured_value = 'inferno'
                fallback_value = None
                bot_message = None
                output = get_text(message=message, entity_name=entity_name, structured_value=structured_value,
                                  fallback_value=fallback_value, bot_message=bot_message)
                print output

                    >> [{'detection': 'structure_value_verified', 'original_text': 'inferno', 'entity_value':
                    {'value': u'Inferno'}}]

            3) Consider an example of movie name detection from  a message
                message = 'i wanted to watch inferno'
                entity_name = 'movie'
                structured_value = 'delhi'
                fallback_value = None
                bot_message = None
                output = get_text(message=message, entity_name=entity_name, structured_value=structured_value,
                                  fallback_value=fallback_value, bot_message=bot_message)
                print output

                    >> [{'detection': 'message', 'original_text': 'inferno', 'entity_value': {'value': u'Inferno'}}]

        """
        if self._language != self._processing_language and self._translation_enabled:
            if structured_value:
                translation_output = translate_text(structured_value,
                                                    self._language,
                                                    self._processing_language)
                structured_value = translation_output[
                    TRANSLATED_TEXT] if translation_output['status'] else None
            elif message:
                translation_output = translate_text(message, self._language,
                                                    self._processing_language)
                message = translation_output[
                    TRANSLATED_TEXT] if translation_output['status'] else None

        text = structured_value if structured_value else message
        entity_list, original_text_list = self.detect_entity(text=text)

        if structured_value:
            if entity_list:
                value, method, original_text = entity_list, FROM_STRUCTURE_VALUE_VERIFIED, original_text_list
            else:
                value, method, original_text = [structured_value], FROM_STRUCTURE_VALUE_NOT_VERIFIED, \
                                               [structured_value]
        elif entity_list:
            value, method, original_text = entity_list, FROM_MESSAGE, original_text_list
        elif fallback_value:
            entity_list, original_text_list = self.detect_entity(
                text=fallback_value)
            value, method, original_text = entity_list, FROM_FALLBACK_VALUE, original_text_list
        else:
            return None

        return self.output_entity_dict_list(
            entity_value_list=value,
            original_text_list=original_text,
            detection_method=method,
            detection_language=self._processing_language)
예제 #3
0
    def detect_bulk(self, messages=None, predetected_values=None, **kwargs):
        """
        Use detector to detect entities from text. It also translates query to language compatible to detector

        Args:
            messages (list of strings): list of natural text(s) on which detection logic is to be run.
            predetected_values(list of list of str): prior detection results
        Returns:
            dict or None: dictionary containing entity_value, original_text and detection;
                          entity_value is in itself a dict with its keys varying from entity to entity

        Example:
            1) Consider an example of restaurant detection from a message

                messages = ['i want to order chinese from  mainland china and pizza from domminos']
                output = detect(message=message)
                print output
                    >> [[{'detection': 'message', 'original_text': 'mainland china', 'entity_value':
                    {'value': u'Mainland China'}}, {'detection': 'message', 'original_text': 'domminos',
                    'entity_value': {'value': u"Domino's Pizza"}}]]
        """
        if messages is None:
            messages = []
        if self._source_language_script != self._target_language_script and self._translation_enabled:
            translation_output_list = [
                translate_text(message_, self._source_language_script,
                               self._target_language_script)
                for message_ in messages
            ]

            messages = []
            for translation_output in translation_output_list:
                messages.append(translation_output[TRANSLATED_TEXT]
                                if translation_output['status'] else '')

        texts = messages

        # Prior results from entity detection using methods like CRF etc.
        if predetected_values is None:
            predetected_values = []
        entities_list, original_list = self.detect_entity_bulk(
            texts=texts, predetected_values=predetected_values)

        fallback_values = kwargs.get('fallback_values')
        values_list, detection_method_list, original_texts_list = [], [], []

        for i in range(len(messages)):
            if entities_list[i]:
                values_list.append(entities_list[i])
                detection_method_list.append(FROM_MESSAGE)
                original_texts_list.append(original_list[i])

            elif fallback_values and fallback_values[i]:
                values_list.append([fallback_values[i]])
                detection_method_list.append(FROM_FALLBACK_VALUE)
                original_texts_list.append([fallback_values[i]])

            else:
                values_list.append([])
                detection_method_list.append(None)
                original_texts_list.append([])

        return self.output_entity_bulk(
            entity_values_list=values_list,
            original_texts_list=original_texts_list,
            detection_method_list=detection_method_list,
            detection_language=self._target_language_script)