def get_location(message, entity_name, structured_value, fallback_value, bot_message): """This functionality calls the TextDetector class to detect location TODO: We can improve this by creating separate class for location detection instead of using TextDetector Attributes: NOTE: Explained above Output: NOTE: Explained above """ text_detection = TextDetector(entity_name=entity_name) if structured_value: text_entity_list, original_text_list = text_detection.detect_entity(structured_value) if text_entity_list: return output_entity_dict_list(text_entity_list, original_text_list, FROM_STRUCTURE_VALUE_VERIFIED) else: return output_entity_dict_value(structured_value, structured_value, FROM_STRUCTURE_VALUE_NOT_VERIFIED) else: text_entity_list, original_text_list = text_detection.detect_entity(message) if text_entity_list: return output_entity_dict_list(text_entity_list, original_text_list, FROM_MESSAGE) elif fallback_value: return output_entity_dict_value(fallback_value, fallback_value, FROM_FALLBACK_VALUE) return None
def __init__(self, entity_name, source_language_script=ENGLISH_LANG, translation_enabled=False): """Initializes a ShoppingSizeDetector object Args: entity_name: A string by which the detected numbers would be replaced with on calling detect_entity() source_language_script: ISO 639 code for language of entities to be detected by the instance of this class translation_enabled: True if messages needs to be translated in case detector does not support a particular language, else False """ # assigning values to superclass attributes self._supported_languages = [ENGLISH_LANG] super(ShoppingSizeDetector, self).__init__(source_language_script, translation_enabled) self.entity_name = entity_name self.text = '' self.text_dict = {} self.tagged_text = '' self.processed_text = '' self.size = [] self.original_size_text = [] self.text_detection_object = TextDetector(entity_name=self.entity_name) self.tag = '__' + self.entity_name + '__'
def _detect_text_budget(self, budget_list=None, original_list=None): """Detects budget from text using text detection logic i.e.TextDetector This is a function which will be called when we want to detect the budget using text Returns: A tuple of two lists with first list containing the detected numbers and second list containing their corresponding substrings in the original message. """ if budget_list is None: budget_list = [] if original_list is None: original_list = [] text_detection_object = TextDetector(entity_name=self.entity_name) budget_text_list, original_text_list = text_detection_object.detect_entity( self.text, return_str=True) # FIXME: Broken/Ineffective code. self.tagged_text = text_detection_object.tagged_text self.processed_text = text_detection_object.processed_text for _, original_text in zip(budget_text_list, original_text_list): budget = { 'min_budget': 0, 'max_budget': 0, 'type': BUDGET_TYPE_TEXT } budget_list.append(budget) original_list.append(original_text) return budget_list, original_list
def text(request): """This functionality initializes text detection functionality to detect textual entities. Attributes: request: url parameters """ try: parameters_dict = get_parameters_dictionary(request) ner_logger.debug('Start: %s ' % parameters_dict[PARAMETER_ENTITY_NAME]) fuzziness = parameters_dict[PARAMETER_FUZZINESS] min_token_len_fuzziness = parameters_dict[PARAMETER_MIN_TOKEN_LEN_FUZZINESS] text_detector = TextDetector(entity_name=parameters_dict[PARAMETER_ENTITY_NAME], source_language_script=parameters_dict[PARAMETER_LANGUAGE_SCRIPT]) ner_logger.debug('fuzziness: %s min_token_len_fuzziness %s' % (str(fuzziness), str(min_token_len_fuzziness))) if fuzziness: fuzziness = parse_fuzziness_parameter(fuzziness) text_detector.set_fuzziness_threshold(fuzziness) if min_token_len_fuzziness: min_token_len_fuzziness = int(min_token_len_fuzziness) text_detector.set_min_token_size_for_levenshtein(min_size=min_token_len_fuzziness) entity_output = text_detector.detect(message=parameters_dict[PARAMETER_MESSAGE], structured_value=parameters_dict[PARAMETER_STRUCTURED_VALUE], fallback_value=parameters_dict[PARAMETER_FALLBACK_VALUE], bot_message=parameters_dict[PARAMETER_BOT_MESSAGE]) ner_logger.debug('Finished %s : %s ' % (parameters_dict[PARAMETER_ENTITY_NAME], entity_output)) except TypeError as e: ner_logger.exception('Exception for text_synonym: %s ' % e) return HttpResponse(status=500) return HttpResponse(json.dumps({'data': entity_output}), content_type='application/json')
def __init__(self, entity_name): self.text = '' self.text_dict = {} self.tagged_text = '' self.processed_text = '' self.location = [] self.original_location_text = [] self.text_detection_object = TextDetector(entity_name=entity_name) self.user_address = None self.user_lat_long = None self.user_location_updated_at = None
def __init__(self, entity_name): """ Initializes a NameDetector object with given entity_name Args: entity_name: A string by which the detected substrings that correspond to text entities would be replaced with on calling detect_entity() """ self.entity_name = entity_name self.text = '' self.names = [] self.tagged_text = '' self.processed_text = '' self.original_name_text = [] self.text_detection_object = TextDetector(entity_name=entity_name)
def __init__(self, entity_name): """Initializes a ShoppingSizeDetector object Args: entity_name: A string by which the detected numbers would be replaced with on calling detect_entity() """ self.entity_name = entity_name self.dictionary_name = 'shopping_size' self.text = '' self.text_dict = {} self.tagged_text = '' self.processed_text = '' self.size = [] self.original_size_text = [] self.text_detection_object = TextDetector(entity_name=dictionary_name) self.tag = '__' + self.entity_name + '__'
def __init__(self, entity_name): """ Initializes a CityDetector object with given entity_name Args: entity_name: A string by which the detected substrings that correspond to text entities would be replaced with on calling detect_entity() """ self.entity_name = entity_name self.text = '' self.bot_message = '' self.tagged_text = '' self.processed_text = '' self.city = [] self.text_detection_object = TextDetector(entity_name=entity_name) self.tag = '__' + self.entity_name + '__'
def __init__(self, entity_name, language=lang_constant.ENGLISH_LANG): """ Initializes a CityDetector object with given entity_name Args: entity_name: A string by which the detected substrings that correspond to text entities would be replaced with on calling detect_entity() language: language code of text """ self.entity_name = entity_name self.text = '' self.bot_message = '' self.tagged_text = '' self.processed_text = '' self.city = [] self.text_detection_object = TextDetector(entity_name=entity_name, source_language_script=language) self.tag = '__' + self.entity_name + '__'
def __init__(self, entity_name): """ Initializes the CityAdvanceDetector object with given entity_name Args: entity_name: A string by which the detected date entity substrings would be replaced with on calling detect_entity() """ self.text = '' self.tagged_text = '' self.processed_text = '' self.city = [] self.original_city_text = [] self.entity_name = entity_name self.text_detection_object = TextDetector(entity_name=entity_name) self.bot_message = None self.tag = '__' + entity_name + '__'
def text(request): """This functionality initializes text detection functionality to detect textual entities. Attributes: request: url parameters """ try: parameters_dict = get_parameters_dictionary(request) ner_logger.debug('Start: %s ' % parameters_dict[PARAMETER_ENTITY_NAME]) text_detector = TextDetector(entity_name=parameters_dict[PARAMETER_ENTITY_NAME], source_language_script=parameters_dict[PARAMETER_LANGUAGE_SCRIPT]) entity_output = text_detector.detect(message=parameters_dict[PARAMETER_MESSAGE], structured_value=parameters_dict[PARAMETER_STRUCTURED_VALUE], fallback_value=parameters_dict[PARAMETER_FALLBACK_VALUE], bot_message=parameters_dict[PARAMETER_BOT_MESSAGE]) ner_logger.debug('Finished %s : %s ' % (parameters_dict[PARAMETER_ENTITY_NAME], entity_output)) except TypeError, e: ner_logger.debug('Exception for text_synonym: %s ' % e) return HttpResponse(status=400)
def __init__(self, entity_name, source_language_script=ENGLISH_LANG, translation_enabled=False): """Initializes a BudgetDetector object Args: entity_name: A string by which the detected budget would be replaced with on calling detect_entity() """ # assigning values to superclass attributes self._supported_languages = [ENGLISH_LANG] super(BudgetDetector, self).__init__(source_language_script, translation_enabled) self.min_digit = 2 self.max_digit = 5 self.entity_name = entity_name self.text = '' self.tagged_text = '' self.processed_text = '' self.budget = [] self.original_budget_text = [] self.unit_present_list = ['k', 'l', 'm', 'c', 'h', 'th'] regx_for_units = [(r'([\d,.]+)\s*k', 1000), (r'([\d,.]+)\s*h', 1000), (r'([\d,.]+)\s*th', 1000), (r'([\d,.]+)\s*l', 100000), (r'([\d,.]+)\s*lacs?', 100000), (r'([\d,.]+)\s*lakh?', 100000), (r'([\d,.]+)\s*lakhs?', 100000), (r'([\d,.]+)\s*m', 1000000), (r'([\d,.]+)\s*million', 1000000), (r'([\d,.]+)\s*mill?', 1000000), (r'([\d,.]+)\s*c', 10000000), (r'([\d,.]+)\s*cro?', 10000000), (r'([\d,.]+)\s*crore?', 10000000), (r'([\d,.]+)\s*crores?', 10000000)] self.regex_object = RegexReplace(regx_for_units) self.tag = '__' + self.entity_name + '__' self.text_detection_object = TextDetector(entity_name=entity_name)
def __init__(self, entity_name): """Initializes a BudgetDetector object Args: entity_name: A string by which the detected budget would be replaced with on calling detect_entity() """ self.min_digit = 2 self.max_digit = 5 self.entity_name = entity_name self.text = '' self.tagged_text = '' self.processed_text = '' self.budget = [] self.original_budget_text = [] regex_for_thousand = [(r'(\d+)k', r'\g<1>000')] self.regex_object = Regex(regex_for_thousand) self.tag = '__' + self.entity_name + '__' self.text_detection_object = TextDetector(entity_name=ES_BUDGET_LIST)
def get_location(message, entity_name, structured_value, fallback_value, bot_message): """"Use TextDetector (elasticsearch) to detect location TODO: We can improve this by creating separate for location detection instead of using TextDetector Args: message (str): natural text on which detection logic is to be run. Note if structured value is detection is run on structured value instead of message entity_name (str): name of the entity. Also acts as elastic-search dictionary name if entity uses elastic-search lookup structured_value (str): Value obtained from any structured elements. Note if structured value is detection is run on structured value instead of message (For example, UI elements like form, payload, etc) fallback_value (str): If the detection logic fails to detect any value either from structured_value or message then we return a fallback_value as an output. bot_message (str): previous message from a bot/agent. Returns: dict or None: dictionary containing entity_value, original_text and detection; entity_value is in itself a dict with its keys varying from entity to entity """ text_detection = TextDetector(entity_name=entity_name) if structured_value: text_entity_list, original_text_list = text_detection.detect_entity(structured_value) if text_entity_list: return output_entity_dict_list(text_entity_list, original_text_list, FROM_STRUCTURE_VALUE_VERIFIED) else: return output_entity_dict_list([structured_value], [structured_value], FROM_STRUCTURE_VALUE_NOT_VERIFIED) else: text_entity_list, original_text_list = text_detection.detect_entity(message) if text_entity_list: return output_entity_dict_list(text_entity_list, original_text_list, FROM_MESSAGE) elif fallback_value: return output_entity_dict_list([fallback_value], [fallback_value], FROM_FALLBACK_VALUE) return None
def get_text(message, entity_name, structured_value, fallback_value, bot_message): """This functionality calls the TextDetector class to detect textual entities Attributes: NOTE: Explained above Output: NOTE: Explained above For Example: message = 'i want to order chinese from mainland china and pizza from domminos' entity_name = 'restaurant' structured_value = None fallback_value = None bot_message = None output = get_text(message=message, entity_name=entity_name, structured_value=structured_value, fallback_value=fallback_value, bot_message=bot_message) print output >> [{'detection': 'message', 'original_text': 'mainland china', 'entity_value': {'value': u'Mainland China'}}, {'detection': 'message', 'original_text': 'domminos', 'entity_value': {'value': u"Domino's Pizza"}}] message = 'i wanted to watch movie' entity_name = 'movie' structured_value = 'inferno' fallback_value = None bot_message = None output = get_text(message=message, entity_name=entity_name, structured_value=structured_value, fallback_value=fallback_value, bot_message=bot_message) print output >> [{'detection': 'structure_value_verified', 'original_text': 'inferno', 'entity_value': {'value': u'Inferno'}}] message = 'i wanted to watch inferno' entity_name = 'movie' structured_value = 'delhi' fallback_value = None bot_message = None output = get_text(message=message, entity_name=entity_name, structured_value=structured_value, fallback_value=fallback_value, bot_message=bot_message) print output >> [{'detection': 'message', 'original_text': 'inferno', 'entity_value': {'value': u'Inferno'}}] """ text_detection = TextDetector(entity_name=entity_name) if structured_value: text_entity_list, original_text_list = text_detection.detect_entity(structured_value) if text_entity_list: return output_entity_dict_list(text_entity_list, original_text_list, FROM_STRUCTURE_VALUE_VERIFIED) else: return output_entity_dict_value(structured_value, structured_value, FROM_STRUCTURE_VALUE_NOT_VERIFIED) else: text_entity_list, original_text_list = text_detection.detect_entity(message) if text_entity_list: return output_entity_dict_list(text_entity_list, original_text_list, FROM_MESSAGE) elif fallback_value: return output_entity_dict_value(fallback_value, fallback_value, FROM_FALLBACK_VALUE) return None
def get_text(message, entity_name, structured_value, fallback_value, bot_message): """Use TextDetector (elasticsearch) to detect textual entities Args: message (str): natural text on which detection logic is to be run. Note if structured value is detection is run on structured value instead of message entity_name (str): name of the entity. Also acts as elastic-search dictionary name if entity uses elastic-search lookup structured_value (str): Value obtained from any structured elements. Note if structured value is detection is run on structured value instead of message (For example, UI elements like form, payload, etc) fallback_value (str): If the detection logic fails to detect any value either from structured_value or message then we return a fallback_value as an output. bot_message (str): previous message from a bot/agent. Returns: dict or None: dictionary containing entity_value, original_text and detection; entity_value is in itself a dict with its keys varying from entity to entity Example: message = 'i want to order chinese from mainland china and pizza from domminos' entity_name = 'restaurant' structured_value = None fallback_value = None bot_message = None output = get_text(message=message, entity_name=entity_name, structured_value=structured_value, fallback_value=fallback_value, bot_message=bot_message) print output >> [{'detection': 'message', 'original_text': 'mainland china', 'entity_value': {'value': u'Mainland China'}}, {'detection': 'message', 'original_text': 'domminos', 'entity_value': {'value': u"Domino's Pizza"}}] message = 'i wanted to watch movie' entity_name = 'movie' structured_value = 'inferno' fallback_value = None bot_message = None output = get_text(message=message, entity_name=entity_name, structured_value=structured_value, fallback_value=fallback_value, bot_message=bot_message) print output >> [{'detection': 'structure_value_verified', 'original_text': 'inferno', 'entity_value': {'value': u'Inferno'}}] message = 'i wanted to watch inferno' entity_name = 'movie' structured_value = 'delhi' fallback_value = None bot_message = None output = get_text(message=message, entity_name=entity_name, structured_value=structured_value, fallback_value=fallback_value, bot_message=bot_message) print output >> [{'detection': 'message', 'original_text': 'inferno', 'entity_value': {'value': u'Inferno'}}] """ text_detection = TextDetector(entity_name=entity_name) if structured_value: text_entity_list, original_text_list = text_detection.detect_entity(structured_value) if text_entity_list: return output_entity_dict_list(text_entity_list, original_text_list, FROM_STRUCTURE_VALUE_VERIFIED) else: return output_entity_dict_list([structured_value], [structured_value], FROM_STRUCTURE_VALUE_NOT_VERIFIED) else: text_entity_list, original_text_list = text_detection.detect_entity(message) if text_entity_list: return output_entity_dict_list(text_entity_list, original_text_list, FROM_MESSAGE) elif fallback_value: return output_entity_dict_list([fallback_value], [fallback_value], FROM_FALLBACK_VALUE) return None