def __init__(self,
                 entity_name,
                 source_language_script=ENGLISH_LANG,
                 translation_enabled=False):
        """Initializes a PassengerDetector object

        Args:
            entity_name: A string by which the detected passenger count would be replaced with on calling
                        detect_entity()
            source_language_script: ISO 639 code for language of entities to be detected by the instance of this class
            translation_enabled: True if messages needs to be translated in case detector does not support a
                                 particular language, else False
        """
        # assigning values to superclass attributes
        self._supported_languages = [ENGLISH_LANG]
        super(PassengerDetector, self).__init__(source_language_script,
                                                translation_enabled)
        self.text = ''
        self.entity_name = entity_name
        self.tagged_text = ''
        self.processed_text = ''
        self.passenger = []
        self.original_passenger_text = []
        self.tag = '__' + self.entity_name + '__'
        self.bot_message = None
        self.number_detection = NumberDetector('numeric_range')
        self.number_detection.set_min_max_digits(min_digit=1, max_digit=2)
예제 #2
0
def get_number(message, entity_name, structured_value, fallback_value,
               bot_message):
    """This functionality calls the NumberDetector class to detect numerals

    Attributes:
        NOTE: Explained above

    Output:
        NOTE: Explained above

    For Example:

        message = "I want to purchase 30 units of mobile and 40 units of Television"
        entity_name = 'number_of_unit'
        structured_value = None
        fallback_value = None
        bot_message = None
        output = get_number(message=message, entity_name=entity_name, structured_value=structured_value,
                          fallback_value=fallback_value, bot_message=bot_message)
        print output

            >> [{'detection': 'message', 'original_text': '30', 'entity_value': {'value': '30'}},
            {'detection': 'message', 'original_text': '40', 'entity_value': {'value': '40'}}]


        message = "I want to reserve a table for 3 people"
        entity_name = 'number_of_people'
        structured_value = None
        fallback_value = None
        bot_message = None
        output = get_number(message=message, entity_name=entity_name, structured_value=structured_value,
                          fallback_value=fallback_value, bot_message=bot_message)
        print output

            >> [{'detection': 'message', 'original_text': 'for 3 people', 'entity_value': {'value': '3'}}]

    """

    number_detection = NumberDetector(entity_name=entity_name)

    if structured_value:
        entity_list, original_text_list = number_detection.detect_entity(
            text=structured_value)
        if entity_list:
            return output_entity_dict_list(entity_list, original_text_list,
                                           FROM_STRUCTURE_VALUE_VERIFIED)
        else:
            return output_entity_dict_value(structured_value, structured_value,
                                            FROM_STRUCTURE_VALUE_NOT_VERIFIED)
    else:
        entity_list, original_text_list = number_detection.detect_entity(
            text=message)
        if entity_list:
            return output_entity_dict_list(entity_list, original_text_list,
                                           FROM_MESSAGE)
        elif fallback_value:
            return output_entity_dict_value(fallback_value, fallback_value,
                                            FROM_FALLBACK_VALUE)

    return None
예제 #3
0
def get_number(message, entity_name, structured_value, fallback_value, bot_message, min_digit=None, max_digit=None):
    """Use NumberDetector to detect numerals

    Args:
        message (str): natural text on which detection logic is to be run. Note if structured value is
                                detection is run on structured value instead of message
        entity_name (str): name of the entity. Also acts as elastic-search dictionary name
                           if entity uses elastic-search lookup
        structured_value (str): Value obtained from any structured elements. Note if structured value is
                                detection is run on structured value instead of message
                                (For example, UI elements like form, payload, etc)
        fallback_value (str): If the detection logic fails to detect any value either from structured_value
                          or message then we return a fallback_value as an output.
        bot_message (str): previous message from a bot/agent.
        min_digit (str): min digit
        max_digit (str): max digit


    Returns:
        dict or None: dictionary containing entity_value, original_text and detection;
                      entity_value is in itself a dict with its keys varying from entity to entity

    Example:

        message = "I want to purchase 30 units of mobile and 40 units of Television"
        entity_name = 'number_of_unit'
        structured_value = None
        fallback_value = None
        bot_message = None
        output = get_number(message=message, entity_name=entity_name, structured_value=structured_value,
                          fallback_value=fallback_value, bot_message=bot_message)
        print output

            >> [{'detection': 'message', 'original_text': '30', 'entity_value': {'value': '30'}},
            {'detection': 'message', 'original_text': '40', 'entity_value': {'value': '40'}}]


        message = "I want to reserve a table for 3 people"
        entity_name = 'number_of_people'
        structured_value = None
        fallback_value = None
        bot_message = None
        output = get_number(message=message, entity_name=entity_name, structured_value=structured_value,
                          fallback_value=fallback_value, bot_message=bot_message)
        print output

            >> [{'detection': 'message', 'original_text': 'for 3 people', 'entity_value': {'value': '3'}}]

    """

    number_detection = NumberDetector(entity_name=entity_name)
    if min_digit and max_digit:
        min_digit = int(min_digit)
        max_digit = int(max_digit)
        number_detection.set_min_max_digits(min_digit=min_digit, max_digit=max_digit)
    return number_detection.detect(message=message, structured_value=structured_value, fallback_value=fallback_value,
                                   bot_message=bot_message)
class PassengerDetector(BaseDetector):
    """Detects passenger count from the text and tags them.

    Detects all passenger count in given text and replaces them by entity_name

    For Example:

        passenger_detector = PassengerDetector("no_of_adults")
        message = "Can you please help me to book tickets for 3 people"
        passenger_count, original_passenger_string = passenger_detector.detect_entity(message)
        tagged_text = passenger_detector.tagged_text

        print passenger_count, ' -- ', original_passenger_string
        print 'Tagged text: ', tagged_text

         >> ['3']  --  ['3']
            Tagged text:   Can you please help me to book tickets for __no_of_adults__ people

    Attributes:
        text: string to extract entities from
        entity_name: string by which the detected passenger count would be replaced with on calling detect_entity()
        tagged_text: string with passenger count replaced with tag defined by entity name
        processed_text: string with detected passenger count removed
        passenger: list of passenger count detected
        original_passenger_text: list to store substrings of the text detected as passenger count
        tag: entity_name prepended and appended with '__'
        bot_message: str, set as the outgoing bot text/message

    Note:
        text and tagged_text will have a extra space prepended and appended after calling detect_entity(text)
    """
    def __init__(self,
                 entity_name,
                 source_language_script=ENGLISH_LANG,
                 translation_enabled=False):
        """Initializes a PassengerDetector object

        Args:
            entity_name: A string by which the detected passenger count would be replaced with on calling
                        detect_entity()
            source_language_script: ISO 639 code for language of entities to be detected by the instance of this class
            translation_enabled: True if messages needs to be translated in case detector does not support a
                                 particular language, else False
        """
        # assigning values to superclass attributes
        self._supported_languages = [ENGLISH_LANG]
        super(PassengerDetector, self).__init__(source_language_script,
                                                translation_enabled)
        self.text = ''
        self.entity_name = entity_name
        self.tagged_text = ''
        self.processed_text = ''
        self.passenger = []
        self.original_passenger_text = []
        self.tag = '__' + self.entity_name + '__'
        self.bot_message = None
        self.number_detection = NumberDetector('numeric_range')
        self.number_detection.set_min_max_digits(min_digit=1, max_digit=2)

    @property
    def supported_languages(self):
        return self._supported_languages

    def detect_entity(self, text, **kwargs):
        """
        Detects passenger count in the text string

        Args:
            text (str): string to extract entities from
            **kwargs: it can be used to send specific arguments in future.

        Returns:
            (passenger_list, original_list) (tuple)
            passenger_list (list): a list consisting of passenger count obtained from text
            original_list (list): a list consisting of corresponding substrings of detected entities in the given text

            For example:

                (['3'], ['3'])
        """
        self.text = ' ' + text + ' '
        self.processed_text = self.text.lower()
        self.tagged_text = self.text
        passenger_data = self._detect_passenger_count()
        self.passenger = passenger_data[0]
        self.original_passenger_text = passenger_data[1]
        return passenger_data

    def _detect_passenger_count(self):
        """
        Detects passenger count from text

        Returns:
            (passenger_list, original_list) (tuple)
            passenger_list (list): a list consisting of passenger count obtained from text
            original_list (list): a list consisting of corresponding substrings of detected entities in the given text
        """
        original_list = []
        passenger_list = []
        if self.entity_name == 'no_of_adults':
            passenger_list, original_list = self._detect_adult_count()
            self._update_processed_text(original_list)
        elif self.entity_name == 'no_of_childs':
            passenger_list, original_list = self._detect_child_count()
            self._update_processed_text(original_list)
        elif self.entity_name == 'no_of_infants':
            passenger_list, original_list = self._detect_infant_count()
            self._update_processed_text(original_list)
        return passenger_list, original_list

    def _detect_adult_count(self):
        """
        Detects adult count from text

        Returns:
            (no_of_adults, original_list) (tuple)
            no_of_adults (list): a list consisting of no_of_adults obtained from text
            original_list (list): a list consisting of corresponding substrings of detected entities in the given text
        """
        no_of_adults = []
        original_list = []
        regex_adult = re.compile(
            r'((\w*\s*\w+)\s*(adult|people|passenger|log|person|ppl|'
            r'traveller))')
        patterns = regex_adult.findall(self.processed_text)
        if not patterns and self.bot_message:
            adult_regex = re.compile(
                r'((number|no|no.|how many)\W*(of)?\W*(adult|passenger|people|person|ppl| '
                r'traveller))')
            if adult_regex.search(self.bot_message) is not None:
                patterns = re.findall(r'([\w]+)', self.processed_text)

                for pattern in patterns:
                    number_list, original_number_list = self.number_detection.detect_entity(
                        pattern.strip())
                    if number_list:
                        no_of_adults.append(number_list[0])
                        original_list.append(original_number_list[0])
        elif patterns:
            for pattern in patterns:
                number_list, original_number_list = self.number_detection.detect_entity(
                    pattern[1].strip())
                if number_list:
                    no_of_adults.append(number_list[0])
                    original_list.append(original_number_list[0])
        return no_of_adults, original_list

    def _detect_child_count(self):
        """
        Detects children count from text

        Returns:
            (no_of_childs, original_list) (tuple)
            no_of_childs (list): a list consisting of no_of_childs obtained from text
            original_list (list): a list consisting of corresponding substrings of detected entities in the given text
        """
        no_of_childs = []
        original_list = []
        regex_child = re.compile(r'((\w*\s*\w+)\s*(child|children|kid))')
        patterns = regex_child.findall(self.processed_text)
        if not patterns and self.bot_message:
            child_regex = re.compile(
                r'((number|no|no.|how many)\W*(of)?\W*(child|children|kid))')
            if child_regex.search(self.bot_message) is not None:
                patterns = re.findall(r'([\w]+)', self.processed_text)
                for pattern in patterns:
                    number_list, original_number_list = self.number_detection.detect_entity(
                        pattern.strip())
                    if number_list:
                        no_of_childs.append(number_list[0])
                        original_list.append(original_number_list[0])
        elif patterns:
            for pattern in patterns:
                number_list, original_number_list = self.number_detection.detect_entity(
                    pattern[1].strip())
                if number_list:
                    no_of_childs.append(number_list[0])
                    original_list.append(original_number_list[0])
        return no_of_childs, original_list

    def _detect_infant_count(self):
        """
        Detects infant count from text

        Returns:
            (no_of_infants, original_list) (tuple)
            no_of_infants (list): a list consisting of no_of_infants obtained from text
            original_list (list): a list consisting of corresponding substrings of detected entities in the given text
        """
        no_of_infants = []
        original_list = []
        regex_infant = re.compile(r'((\w*\s*\w+)\s*(infant|bachcha))')
        patterns = regex_infant.findall(self.processed_text)
        if not patterns and self.bot_message:
            infant_regex = re.compile(
                r'((number|no|no.|how many)\W*(of)?\W*(infant|baby))')
            if infant_regex.search(self.bot_message) is not None:
                patterns = re.findall(r'([\w]+)', self.processed_text)
                for pattern in patterns:
                    number_list, original_number_list = self.number_detection.detect_entity(
                        pattern.strip())
                    if number_list:
                        no_of_infants.append(number_list[0])
                        original_list.append(original_number_list[0])
        elif patterns:
            for pattern in patterns:
                number_list, original_number_list = self.number_detection.detect_entity(
                    pattern[1].strip())
                if number_list:
                    no_of_infants.append(number_list[0])
                    original_list.append(original_number_list[0])
        return no_of_infants, original_list

    def _update_processed_text(self, original_passenger_strings):
        """
        Replaces detected passenger count with self.tag generated from entity_name used to initialize the object with

        A final string with all passenger count replaced will be stored in self.tagged_text attribute
        A string with all passenger count removed will be stored in self.processed_text attribute

        Args:
            original_passenger_strings: list of substrings of original text to be replaced with self.tag
        """
        for detected_text in original_passenger_strings:
            self.tagged_text = self.tagged_text.replace(
                detected_text, self.tag)
            self.processed_text = self.processed_text.replace(
                detected_text, '')

    def set_bot_message(self, bot_message):
        """
        Sets the object's bot_message attribute

        Args:
            bot_message (str): previous message that is sent by the bot
        """
        self.bot_message = bot_message