コード例 #1
0
def get_location(message, entity_name, structured_value, fallback_value, bot_message):
    """This functionality calls the TextDetector class to detect location

    TODO: We can improve this by creating separate class for location detection instead of using TextDetector

    Attributes:
        NOTE: Explained above

    Output:
        NOTE: Explained above

    """

    text_detection = TextDetector(entity_name=entity_name)
    if structured_value:
        text_entity_list, original_text_list = text_detection.detect_entity(structured_value)
        if text_entity_list:
            return output_entity_dict_list(text_entity_list, original_text_list, FROM_STRUCTURE_VALUE_VERIFIED)
        else:
            return output_entity_dict_value(structured_value, structured_value, FROM_STRUCTURE_VALUE_NOT_VERIFIED)
    else:
        text_entity_list, original_text_list = text_detection.detect_entity(message)
        if text_entity_list:
            return output_entity_dict_list(text_entity_list, original_text_list, FROM_MESSAGE)
        elif fallback_value:
            return output_entity_dict_value(fallback_value, fallback_value, FROM_FALLBACK_VALUE)

    return None
コード例 #2
0
    def __init__(self,
                 entity_name,
                 source_language_script=ENGLISH_LANG,
                 translation_enabled=False):
        """Initializes a ShoppingSizeDetector object

        Args:
            entity_name: A string by which the detected numbers would be replaced with on calling detect_entity()
            source_language_script: ISO 639 code for language of entities to be detected by the instance of this class
            translation_enabled: True if messages needs to be translated in case detector does not support a
                                 particular language, else False
        """
        # assigning values to superclass attributes
        self._supported_languages = [ENGLISH_LANG]
        super(ShoppingSizeDetector, self).__init__(source_language_script,
                                                   translation_enabled)
        self.entity_name = entity_name
        self.text = ''
        self.text_dict = {}
        self.tagged_text = ''
        self.processed_text = ''
        self.size = []
        self.original_size_text = []
        self.text_detection_object = TextDetector(entity_name=self.entity_name)
        self.tag = '__' + self.entity_name + '__'
コード例 #3
0
    def _detect_text_budget(self, budget_list=None, original_list=None):
        """Detects budget  from text using text detection logic i.e.TextDetector
        This is a function which will be called when we want to detect the budget using text

        Returns:
            A tuple of two lists with first list containing the detected numbers and second list containing their
            corresponding substrings in the original message.

        """
        if budget_list is None:
            budget_list = []
        if original_list is None:
            original_list = []

        text_detection_object = TextDetector(entity_name=self.entity_name)

        budget_text_list, original_text_list = text_detection_object.detect_entity(
            self.text, return_str=True)
        # FIXME: Broken/Ineffective code.
        self.tagged_text = text_detection_object.tagged_text
        self.processed_text = text_detection_object.processed_text
        for _, original_text in zip(budget_text_list, original_text_list):
            budget = {
                'min_budget': 0,
                'max_budget': 0,
                'type': BUDGET_TYPE_TEXT
            }

            budget_list.append(budget)
            original_list.append(original_text)

        return budget_list, original_list
コード例 #4
0
ファイル: api.py プロジェクト: amansrivastava17/chatbot_ner
def text(request):
    """This functionality initializes text detection functionality to detect textual entities.

    Attributes:
        request: url parameters

    """
    try:
        parameters_dict = get_parameters_dictionary(request)
        ner_logger.debug('Start: %s ' % parameters_dict[PARAMETER_ENTITY_NAME])
        fuzziness = parameters_dict[PARAMETER_FUZZINESS]
        min_token_len_fuzziness = parameters_dict[PARAMETER_MIN_TOKEN_LEN_FUZZINESS]
        text_detector = TextDetector(entity_name=parameters_dict[PARAMETER_ENTITY_NAME],
                                     source_language_script=parameters_dict[PARAMETER_LANGUAGE_SCRIPT])
        ner_logger.debug('fuzziness: %s min_token_len_fuzziness %s' % (str(fuzziness), str(min_token_len_fuzziness)))
        if fuzziness:
            fuzziness = parse_fuzziness_parameter(fuzziness)
            text_detector.set_fuzziness_threshold(fuzziness)

        if min_token_len_fuzziness:
            min_token_len_fuzziness = int(min_token_len_fuzziness)
            text_detector.set_min_token_size_for_levenshtein(min_size=min_token_len_fuzziness)

        entity_output = text_detector.detect(message=parameters_dict[PARAMETER_MESSAGE],
                                             structured_value=parameters_dict[PARAMETER_STRUCTURED_VALUE],
                                             fallback_value=parameters_dict[PARAMETER_FALLBACK_VALUE],
                                             bot_message=parameters_dict[PARAMETER_BOT_MESSAGE])
        ner_logger.debug('Finished %s : %s ' % (parameters_dict[PARAMETER_ENTITY_NAME], entity_output))
    except TypeError as e:
        ner_logger.exception('Exception for text_synonym: %s ' % e)
        return HttpResponse(status=500)
    return HttpResponse(json.dumps({'data': entity_output}), content_type='application/json')
コード例 #5
0
 def __init__(self, entity_name):
     self.text = ''
     self.text_dict = {}
     self.tagged_text = ''
     self.processed_text = ''
     self.location = []
     self.original_location_text = []
     self.text_detection_object = TextDetector(entity_name=entity_name)
     self.user_address = None
     self.user_lat_long = None
     self.user_location_updated_at = None
コード例 #6
0
    def __init__(self, entity_name):
        """
        Initializes a NameDetector object with given entity_name

        Args:
            entity_name: A string by which the detected substrings that correspond to text entities would be replaced
                         with on calling detect_entity()
        """
        self.entity_name = entity_name
        self.text = ''
        self.names = []
        self.tagged_text = ''
        self.processed_text = ''
        self.original_name_text = []
        self.text_detection_object = TextDetector(entity_name=entity_name)
コード例 #7
0
    def __init__(self, entity_name):
        """Initializes a ShoppingSizeDetector object

        Args:
            entity_name: A string by which the detected numbers would be replaced with on calling detect_entity()
        """
        self.entity_name = entity_name
        self.dictionary_name = 'shopping_size'
        self.text = ''
        self.text_dict = {}
        self.tagged_text = ''
        self.processed_text = ''
        self.size = []
        self.original_size_text = []
        self.text_detection_object = TextDetector(entity_name=dictionary_name)
        self.tag = '__' + self.entity_name + '__'
コード例 #8
0
    def __init__(self, entity_name):
        """
        Initializes a CityDetector object with given entity_name

        Args:
            entity_name: A string by which the detected substrings that correspond to text entities would be replaced
                         with on calling detect_entity()
        """

        self.entity_name = entity_name
        self.text = ''
        self.bot_message = ''
        self.tagged_text = ''
        self.processed_text = ''
        self.city = []
        self.text_detection_object = TextDetector(entity_name=entity_name)
        self.tag = '__' + self.entity_name + '__'
コード例 #9
0
    def __init__(self, entity_name, language=lang_constant.ENGLISH_LANG):
        """
        Initializes a CityDetector object with given entity_name

        Args:
            entity_name: A string by which the detected substrings that correspond to text entities would be replaced
                         with on calling detect_entity()
            language: language code of text
        """

        self.entity_name = entity_name
        self.text = ''
        self.bot_message = ''
        self.tagged_text = ''
        self.processed_text = ''
        self.city = []
        self.text_detection_object = TextDetector(entity_name=entity_name, source_language_script=language)
        self.tag = '__' + self.entity_name + '__'
コード例 #10
0
    def __init__(self, entity_name):
        """
        Initializes the CityAdvanceDetector object with given entity_name

        Args:
            entity_name: A string by which the detected date entity substrings would be replaced with on calling
                        detect_entity()
        """

        self.text = ''
        self.tagged_text = ''
        self.processed_text = ''
        self.city = []
        self.original_city_text = []
        self.entity_name = entity_name
        self.text_detection_object = TextDetector(entity_name=entity_name)
        self.bot_message = None
        self.tag = '__' + entity_name + '__'
コード例 #11
0
ファイル: api.py プロジェクト: yangvict/chatbot_ner
def text(request):
    """This functionality initializes text detection functionality to detect textual entities.

    Attributes:
        request: url parameters

    """
    try:
        parameters_dict = get_parameters_dictionary(request)
        ner_logger.debug('Start: %s ' % parameters_dict[PARAMETER_ENTITY_NAME])
        text_detector = TextDetector(entity_name=parameters_dict[PARAMETER_ENTITY_NAME],
                                     source_language_script=parameters_dict[PARAMETER_LANGUAGE_SCRIPT])
        entity_output = text_detector.detect(message=parameters_dict[PARAMETER_MESSAGE],
                                             structured_value=parameters_dict[PARAMETER_STRUCTURED_VALUE],
                                             fallback_value=parameters_dict[PARAMETER_FALLBACK_VALUE],
                                             bot_message=parameters_dict[PARAMETER_BOT_MESSAGE])
        ner_logger.debug('Finished %s : %s ' % (parameters_dict[PARAMETER_ENTITY_NAME], entity_output))
    except TypeError, e:
        ner_logger.debug('Exception for text_synonym: %s ' % e)
        return HttpResponse(status=400)
コード例 #12
0
    def __init__(self,
                 entity_name,
                 source_language_script=ENGLISH_LANG,
                 translation_enabled=False):
        """Initializes a BudgetDetector object

        Args:
            entity_name: A string by which the detected budget would be replaced with on calling detect_entity()
        """

        # assigning values to superclass attributes
        self._supported_languages = [ENGLISH_LANG]
        super(BudgetDetector, self).__init__(source_language_script,
                                             translation_enabled)

        self.min_digit = 2
        self.max_digit = 5
        self.entity_name = entity_name

        self.text = ''
        self.tagged_text = ''
        self.processed_text = ''
        self.budget = []
        self.original_budget_text = []
        self.unit_present_list = ['k', 'l', 'm', 'c', 'h', 'th']
        regx_for_units = [(r'([\d,.]+)\s*k', 1000), (r'([\d,.]+)\s*h', 1000),
                          (r'([\d,.]+)\s*th', 1000),
                          (r'([\d,.]+)\s*l', 100000),
                          (r'([\d,.]+)\s*lacs?', 100000),
                          (r'([\d,.]+)\s*lakh?', 100000),
                          (r'([\d,.]+)\s*lakhs?', 100000),
                          (r'([\d,.]+)\s*m', 1000000),
                          (r'([\d,.]+)\s*million', 1000000),
                          (r'([\d,.]+)\s*mill?', 1000000),
                          (r'([\d,.]+)\s*c', 10000000),
                          (r'([\d,.]+)\s*cro?', 10000000),
                          (r'([\d,.]+)\s*crore?', 10000000),
                          (r'([\d,.]+)\s*crores?', 10000000)]
        self.regex_object = RegexReplace(regx_for_units)
        self.tag = '__' + self.entity_name + '__'
        self.text_detection_object = TextDetector(entity_name=entity_name)
コード例 #13
0
    def __init__(self, entity_name):
        """Initializes a BudgetDetector object

        Args:
            entity_name: A string by which the detected budget would be replaced with on calling detect_entity()
        """

        self.min_digit = 2
        self.max_digit = 5
        self.entity_name = entity_name

        self.text = ''
        self.tagged_text = ''
        self.processed_text = ''
        self.budget = []
        self.original_budget_text = []

        regex_for_thousand = [(r'(\d+)k', r'\g<1>000')]
        self.regex_object = Regex(regex_for_thousand)
        self.tag = '__' + self.entity_name + '__'
        self.text_detection_object = TextDetector(entity_name=ES_BUDGET_LIST)
コード例 #14
0
def get_location(message, entity_name, structured_value, fallback_value, bot_message):
    """"Use TextDetector (elasticsearch) to detect location

    TODO: We can improve this by creating separate for location detection instead of using TextDetector

    Args:
        message (str): natural text on which detection logic is to be run. Note if structured value is
                                detection is run on structured value instead of message
        entity_name (str): name of the entity. Also acts as elastic-search dictionary name
                           if entity uses elastic-search lookup
        structured_value (str): Value obtained from any structured elements. Note if structured value is
                                detection is run on structured value instead of message
                                (For example, UI elements like form, payload, etc)
        fallback_value (str): If the detection logic fails to detect any value either from structured_value
                          or message then we return a fallback_value as an output.
        bot_message (str): previous message from a bot/agent.


    Returns:
        dict or None: dictionary containing entity_value, original_text and detection;
                      entity_value is in itself a dict with its keys varying from entity to entity
    """

    text_detection = TextDetector(entity_name=entity_name)
    if structured_value:
        text_entity_list, original_text_list = text_detection.detect_entity(structured_value)
        if text_entity_list:
            return output_entity_dict_list(text_entity_list, original_text_list, FROM_STRUCTURE_VALUE_VERIFIED)
        else:
            return output_entity_dict_list([structured_value], [structured_value], FROM_STRUCTURE_VALUE_NOT_VERIFIED)
    else:
        text_entity_list, original_text_list = text_detection.detect_entity(message)
        if text_entity_list:
            return output_entity_dict_list(text_entity_list, original_text_list, FROM_MESSAGE)
        elif fallback_value:
            return output_entity_dict_list([fallback_value], [fallback_value], FROM_FALLBACK_VALUE)

    return None
コード例 #15
0
def get_text(message, entity_name, structured_value, fallback_value, bot_message):
    """This functionality calls the TextDetector class to detect textual entities

    Attributes:
        NOTE: Explained above

    Output:
        NOTE: Explained above

    For Example:

        message = 'i want to order chinese from  mainland china and pizza from domminos'
        entity_name = 'restaurant'
        structured_value = None
        fallback_value = None
        bot_message = None
        output = get_text(message=message, entity_name=entity_name, structured_value=structured_value,
                          fallback_value=fallback_value, bot_message=bot_message)
        print output

            >> [{'detection': 'message', 'original_text': 'mainland china', 'entity_value':
            {'value': u'Mainland China'}}, {'detection': 'message', 'original_text': 'domminos',
            'entity_value': {'value': u"Domino's Pizza"}}]



        message = 'i wanted to watch movie'
        entity_name = 'movie'
        structured_value = 'inferno'
        fallback_value = None
        bot_message = None
        output = get_text(message=message, entity_name=entity_name, structured_value=structured_value,
                          fallback_value=fallback_value, bot_message=bot_message)
        print output

            >> [{'detection': 'structure_value_verified', 'original_text': 'inferno', 'entity_value':
            {'value': u'Inferno'}}]


        message = 'i wanted to watch inferno'
        entity_name = 'movie'
        structured_value = 'delhi'
        fallback_value = None
        bot_message = None
        output = get_text(message=message, entity_name=entity_name, structured_value=structured_value,
                          fallback_value=fallback_value, bot_message=bot_message)
        print output

            >> [{'detection': 'message', 'original_text': 'inferno', 'entity_value': {'value': u'Inferno'}}]

    """
    text_detection = TextDetector(entity_name=entity_name)
    if structured_value:
        text_entity_list, original_text_list = text_detection.detect_entity(structured_value)
        if text_entity_list:
            return output_entity_dict_list(text_entity_list, original_text_list, FROM_STRUCTURE_VALUE_VERIFIED)
        else:
            return output_entity_dict_value(structured_value, structured_value, FROM_STRUCTURE_VALUE_NOT_VERIFIED)
    else:
        text_entity_list, original_text_list = text_detection.detect_entity(message)
        if text_entity_list:
            return output_entity_dict_list(text_entity_list, original_text_list, FROM_MESSAGE)
        elif fallback_value:
            return output_entity_dict_value(fallback_value, fallback_value, FROM_FALLBACK_VALUE)

    return None
コード例 #16
0
def get_text(message, entity_name, structured_value, fallback_value, bot_message):
    """Use TextDetector (elasticsearch) to detect textual entities

    Args:
        message (str): natural text on which detection logic is to be run. Note if structured value is
                                detection is run on structured value instead of message
        entity_name (str): name of the entity. Also acts as elastic-search dictionary name
                           if entity uses elastic-search lookup
        structured_value (str): Value obtained from any structured elements. Note if structured value is
                                detection is run on structured value instead of message
                                (For example, UI elements like form, payload, etc)
        fallback_value (str): If the detection logic fails to detect any value either from structured_value
                          or message then we return a fallback_value as an output.
        bot_message (str): previous message from a bot/agent.


    Returns:
        dict or None: dictionary containing entity_value, original_text and detection;
                      entity_value is in itself a dict with its keys varying from entity to entity

    Example:

        message = 'i want to order chinese from  mainland china and pizza from domminos'
        entity_name = 'restaurant'
        structured_value = None
        fallback_value = None
        bot_message = None
        output = get_text(message=message, entity_name=entity_name, structured_value=structured_value,
                          fallback_value=fallback_value, bot_message=bot_message)
        print output

            >> [{'detection': 'message', 'original_text': 'mainland china', 'entity_value':
            {'value': u'Mainland China'}}, {'detection': 'message', 'original_text': 'domminos',
            'entity_value': {'value': u"Domino's Pizza"}}]



        message = 'i wanted to watch movie'
        entity_name = 'movie'
        structured_value = 'inferno'
        fallback_value = None
        bot_message = None
        output = get_text(message=message, entity_name=entity_name, structured_value=structured_value,
                          fallback_value=fallback_value, bot_message=bot_message)
        print output

            >> [{'detection': 'structure_value_verified', 'original_text': 'inferno', 'entity_value':
            {'value': u'Inferno'}}]


        message = 'i wanted to watch inferno'
        entity_name = 'movie'
        structured_value = 'delhi'
        fallback_value = None
        bot_message = None
        output = get_text(message=message, entity_name=entity_name, structured_value=structured_value,
                          fallback_value=fallback_value, bot_message=bot_message)
        print output

            >> [{'detection': 'message', 'original_text': 'inferno', 'entity_value': {'value': u'Inferno'}}]

    """
    text_detection = TextDetector(entity_name=entity_name)
    if structured_value:
        text_entity_list, original_text_list = text_detection.detect_entity(structured_value)
        if text_entity_list:
            return output_entity_dict_list(text_entity_list, original_text_list, FROM_STRUCTURE_VALUE_VERIFIED)
        else:
            return output_entity_dict_list([structured_value], [structured_value], FROM_STRUCTURE_VALUE_NOT_VERIFIED)
    else:
        text_entity_list, original_text_list = text_detection.detect_entity(message)
        if text_entity_list:
            return output_entity_dict_list(text_entity_list, original_text_list, FROM_MESSAGE)
        elif fallback_value:
            return output_entity_dict_list([fallback_value], [fallback_value], FROM_FALLBACK_VALUE)

    return None