def run_test(self): message = testcase["message"] unit_type = testcase.get("unit_type", None) number_detector_object = NumberDetector(entity_name="number", language=language, unit_type=unit_type) number_detector_object.set_min_max_digits( min_digit=testcase.get('min_digit', number_detector_object.min_digit), max_digit=testcase.get('max_digit', number_detector_object.max_digit)) number_dicts, spans = number_detector_object.detect_entity(message) expected_number_dicts, expected_spans = parse_expected_outputs( testcase["outputs"]) expected_outputs = list( six.moves.zip(expected_number_dicts, expected_spans)) prefix = failure_string_prefix.format(message=message, language=language) self.assertEqual( len(number_dicts), len(spans), prefix + u"Returned numbers and original_texts have different lengths") self.assertEqual( len(spans), len(expected_outputs), prefix + u"Returned numbers and expected_outputs have different lengths" ) for output in six.moves.zip(number_dicts, spans): self.assertIn( output, expected_outputs, prefix + u"{got} not in {expected_outputs}".format( got=output, expected_outputs=expected_outputs))
def test_en_number_detection_for_integer_number_with_unit(self): """ Number detection for english language for integer number with units like 'Rs100', '2Rs' """ message = u'rs.100 is the application charger' number_detector_object = NumberDetector(entity_name=self.entity_name, language='en') number_dicts, original_texts = number_detector_object.detect_entity( message) zipped = zip(number_dicts, original_texts) self.assertEqual(len(zipped), 1) self.assertIn(({'value': '100', 'unit': 'rupees'}, 'rs.100'), zipped)
def test_en_number_detection_for_integer_number(self): """ Number detection for english language for integer number like '100', '2' """ message = u'100 got selected for interview' number_detector_object = NumberDetector(entity_name=self.entity_name, language='en') number_dicts, original_texts = number_detector_object.detect_entity( message) zipped = zip(number_dicts, original_texts) self.assertEqual(len(zipped), 1) self.assertIn(({'value': '100', 'unit': None}, u'100'), zipped)
def test_en_number_detection_for_decimal_number_with_scale_and_unit(self): """ Number detection for english language for decimal number with scale like '1.2 thousand', '2.2k' excluding unit """ message = 'I bought a car toy for 2.3k rupees' number_detector_object = NumberDetector(entity_name=self.entity_name, language='en') number_dicts, original_texts = number_detector_object.detect_entity( message) zipped = list(zip(number_dicts, original_texts)) self.assertEqual(len(zipped), 1) self.assertIn(({'value': '2300', 'unit': None}, u'2.3k'), zipped)
def test_en_number_detection_for_decimal_number_with_scale(self): """ Number detection for english language for decimal number with scale like '1.2 thousand', '2.2k', '1.4m' """ message = 'my monthly salary is 2.2k' number_detector_object = NumberDetector(entity_name=self.entity_name, language='en') number_dicts, original_texts = number_detector_object.detect_entity( message) zipped = zip(number_dicts, original_texts) self.assertEqual(len(zipped), 1) self.assertIn(({'value': '2200', 'unit': None}, u'2.2k'), zipped)
def test_en_number_detection_for_integer_number_with_scale(self): """ Number detection for english language for integer number with scale like '1 thousand', '1k', '1m' """ message = '1 thousand men were killed in war' number_detector_object = NumberDetector(entity_name=self.entity_name, language='en') number_dicts, original_texts = number_detector_object.detect_entity( message) zipped = zip(number_dicts, original_texts) self.assertEqual(len(zipped), 1) self.assertIn(({'value': '1000', 'unit': None}, u'1 thousand'), zipped)
def test_en_number_detection_for_decimal_number(self): """ Number detection for english language for decimal number like '100.2' """ message = u'Todays temperature is 11.2 degree celsius' number_detector_object = NumberDetector(entity_name=self.entity_name, language='en') number_dicts, original_texts = number_detector_object.detect_entity( message) zipped = zip(number_dicts, original_texts) self.assertEqual(len(zipped), 1) self.assertIn(({'value': '11.2', 'unit': None}, u'11.2'), zipped)
def __init__(self, entity_name, language, data_directory_path, unit_type=None): """ Standard Number detection class, read data from language data path and help to detect number ranges like min and max value from given number range text for given languages. Args: entity_name (str): entity_name: string by which the detected number would be replaced language (str): language code of text data_directory_path (str): path of data folder for given language unit_type (str, optional): number unit types like weight, currency, temperature, used to detect number with specific unit type only. If None, it will detect all number ranges irrespective of units. You can see all unit types supported inside number detection language data with filename unit.csv. """ self.text = '' self.tagged_text = '' self.processed_text = '' self.entity_name = entity_name self.tag = '__' + entity_name + '__' self.range_variants_map = {} self.unit_type = unit_type self.language = language self.min_range_prefix_variants = None self.min_range_suffix_variants = None self.max_range_prefix_variants = None self.max_range_suffix_variants = None self.min_max_range_variants = None self.number_detected_map = {} self.number_detector = NumberDetector(entity_name=entity_name, language=language, unit_type=unit_type, detect_without_unit=True) self.number_detector.set_min_max_digits(1, 100) # Method to initialise regex params self._init_regex_for_range(data_directory_path) # Variable to define default order in which detector will work self.detector_preferences = [ self._detect_min_max_num_range, self._detect_min_num_range_with_prefix_variants, self._detect_min_num_range_with_suffix_variants, self._detect_max_num_range_with_prefix_variants, self._detect_max_num_range_with_suffix_variants, self._detect_absolute_number ]
def test_en_number_detection_for_decimal_number_with_scale_and_unit_and_different_unit_type_given( self): """ Number detection for english language for decimal number with scale like '1.2 thousand rupees', 'Rupees 2.2k' """ message = 'I buys 2.3k kg mango' number_detector_object = NumberDetector(entity_name=self.entity_name, language='en', unit_type='currency') number_dicts, original_texts = number_detector_object.detect_entity( message) zipped = list(zip(number_dicts, original_texts)) self.assertEqual(len(zipped), 0)
def test_en_number_detection_for_decimal_number_with_unit(self): """ Number detection for english language for decimal number with unit like '10.2k rupees' """ message = u'my monthly salary is 10.12k rupees' number_detector_object = NumberDetector(entity_name=self.entity_name, language='en') number_dicts, original_texts = number_detector_object.detect_entity( message) zipped = zip(number_dicts, original_texts) self.assertEqual(len(zipped), 1) self.assertIn(({ 'value': '10120', 'unit': 'rupees' }, u'10.12k rupees'), zipped)
def test_en_number_detection_for_integer_number_with_scale_and_unit(self): """ Number detection for english language for integer number with scale and unit like 'Rs 1 thousand', '1k Rs' """ message = 'i need 1 thousand rupees' number_detector_object = NumberDetector(entity_name=self.entity_name, language='en') number_dicts, original_texts = number_detector_object.detect_entity( message) zipped = zip(number_dicts, original_texts) self.assertEqual(len(zipped), 1) self.assertIn(({ 'value': '1000', 'unit': 'rupees' }, u'1 thousand rupees'), zipped)
def test_en_number_detection_for_decimal_number_with_scale_and_unit_and_unit_type_given( self): """ Number detection for english language for decimal number with scale like '1.2 thousand rupees', 'Rupees 2.2k' """ message = 'I bought a car toy for 2.3k rupees' number_detector_object = NumberDetector(entity_name=self.entity_name, language='en', unit_type='currency') number_dicts, original_texts = number_detector_object.detect_entity( message) zipped = zip(number_dicts, original_texts) self.assertEqual(len(zipped), 1) self.assertIn(({ 'value': '2300', 'unit': 'rupees' }, u'2.3k rupees'), zipped)
def resolve_numerals(text, language) -> str: """ Uses NumberDetector to resolve numeric occurrences in text for both English and Hindi. Args: text (str): processed string with numerals and character constants fixed language (str): Language for NumberDetector Returns: processed_text (str): modified text """ processed_text = text number_detector = NumberDetector('asr_dummy', language=language) # FIXME: Detection fails if text starts with '0' since number detector discards it detected_numerals, original_texts = number_detector.detect_entity( text=text) detected_numerals_hi, original_texts_hi = number_detector.detect_entity( text=text, language='hi') detected_numerals.extend(detected_numerals_hi) original_texts.extend(original_texts_hi) for number, original_text in zip(detected_numerals, original_texts): substitution_reg = re.compile(re.escape(original_text), re.IGNORECASE) processed_text = substitution_reg.sub( number[NUMBER_DETECTION_RETURN_DICT_VALUE], processed_text) return processed_text
def __init__(self, entity_name, language=ENGLISH_LANG, locale=None): """ Args: entity_name (str): A string by which the detected numbers would be replaced with on calling detect_entity() language (str, optional): language code of number text, defaults to 'en' locale(str, optional): locale of the country from which you are dialing. Ex: 'en-IN' """ self._supported_languages = NumberDetector.get_supported_languages() super(PhoneDetector, self).__init__(language, locale) self.language = language self.locale = locale or 'en-IN' self.text = '' self.phone, self.original_phone_text = [], [] self.country_code = self.get_country_code_from_locale() self.entity_name = entity_name self.tag = '__' + self.entity_name + '__'
def __init__(self, entity_name, language=ENGLISH_LANG): """ Args: entity_name (str): A string by which the detected numbers would be replaced with on calling detect_entity() language (str, optional): language code of number text, defaults to 'en' """ self._supported_languages = NumberDetector.get_supported_languages() super(PhoneDetector, self).__init__(language) self.language = language self.entity_name = entity_name self.text = '' self.tagged_text = '' self.processed_text = '' self.phone = [] self.original_phone_text = [] self.tag = '__' + self.entity_name + '__'
def number(request): """Use NumberDetector to detect numerals Attributes: request: url parameters: request params: message (str): natural text on which detection logic is to be run. Note if structured value is detection is run on structured value instead of message entity_name (str): name of the entity. Also acts as elastic-search dictionary name if entity uses elastic-search lookup structured_value (str): Value obtained from any structured elements. Note if structured value is detection is run on structured value instead of message (For example, UI elements like form, payload, etc) fallback_value (str): If the detection logic fails to detect any value either from structured_value or message then we return a fallback_value as an output. bot_message (str): previous message from a bot/agent. unit_type(str): restrict number range to detect for some unit types like 'currency', 'temperature' min_digit (str): min digit max_digit (str): max digit Returns: dict or None: dictionary containing entity_value, original_text and detection; entity_value is in itself a dict with its keys varying from entity to entity Example: message = "I want to purchase 30 units of mobile and 40 units of Television" entity_name = 'number_of_unit' structured_value = None fallback_value = None bot_message = None unit_type = None output = get_number(message=message, entity_name=entity_name, structured_value=structured_value, fallback_value=fallback_value, bot_message=bot_message, min_digit=1, max_digit=2) print output >> [{'detection': 'message', 'original_text': '30', 'entity_value': {'value': '30', 'unit': None}}, {'detection': 'message', 'original_text': '40', 'entity_value': {'value': '40', 'unit': None}}] message = "I want to reserve a table for 3 people" entity_name = 'number_of_people' structured_value = None fallback_value = None bot_message = None unit_type = None min_digit=1 max_digit=6 output = number(request) print output >> [{'detection': 'message', 'original_text': 'for 3 people', 'entity_value': {'value': '3', 'unit': 'people'}}] """ try: parameters_dict = get_parameters_dictionary(request) ner_logger.debug('Start: %s ' % parameters_dict[PARAMETER_ENTITY_NAME]) number_detection = NumberDetector(entity_name=parameters_dict[PARAMETER_ENTITY_NAME], language=parameters_dict[PARAMETER_SOURCE_LANGUAGE], unit_type=parameters_dict[PARAMETER_NUMBER_UNIT_TYPE]) if parameters_dict[PARAMETER_MIN_DIGITS] and parameters_dict[PARAMETER_MAX_DIGITS]: min_digit = int(parameters_dict[PARAMETER_MIN_DIGITS]) max_digit = int(parameters_dict[PARAMETER_MAX_DIGITS]) number_detection.set_min_max_digits(min_digit=min_digit, max_digit=max_digit) entity_output = number_detection.detect(message=parameters_dict[PARAMETER_MESSAGE], structured_value=parameters_dict[PARAMETER_STRUCTURED_VALUE], fallback_value=parameters_dict[PARAMETER_FALLBACK_VALUE], bot_message=parameters_dict[PARAMETER_BOT_MESSAGE]) ner_logger.debug('Finished %s : %s ' % (parameters_dict[PARAMETER_ENTITY_NAME], entity_output)) except TypeError as e: ner_logger.exception('Exception for numeric: %s ' % e) return HttpResponse(status=500) return HttpResponse(json.dumps({'data': entity_output}), content_type='application/json')
class BaseNumberRangeDetector(object): def __init__(self, entity_name, language, data_directory_path, unit_type=None): """ Standard Number detection class, read data from language data path and help to detect number ranges like min and max value from given number range text for given languages. Args: entity_name (str): entity_name: string by which the detected number would be replaced language (str): language code of text data_directory_path (str): path of data folder for given language unit_type (str, optional): number unit types like weight, currency, temperature, used to detect number with specific unit type only. If None, it will detect all number ranges irrespective of units. You can see all unit types supported inside number detection language data with filename unit.csv. """ self.text = '' self.tagged_text = '' self.processed_text = '' self.entity_name = entity_name self.tag = '__' + entity_name + '__' self.range_variants_map = {} self.unit_type = unit_type self.min_range_prefix_variants = None self.min_range_suffix_variants = None self.max_range_prefix_variants = None self.max_range_suffix_variants = None self.min_max_range_variants = None self.number_detected_map = {} self.number_detector = NumberDetector(entity_name=entity_name, language=language) self.number_detector.set_min_max_digits(1, 100) # Method to initialise regex params self._init_regex_for_range(data_directory_path) # Variable to define default order in which detector will work self.detector_preferences = [ self._detect_min_max_num_range, self._detect_min_num_range_with_prefix_variants, self._detect_min_num_range_with_suffix_variants, self._detect_max_num_range_with_prefix_variants, self._detect_max_num_range_with_suffix_variants ] def _init_regex_for_range(self, data_directory_path): """ Initialise params which hold variants of keywords defining whether a given number range in text contains min value, max value or both. Params: min_range_start_variants (list): List of keywords which occur before min value in text min_range_end_variants (list): List of keywords which occur after min value in text max_range_start_variants (list): List of keywords which occur before max value in text max_range_end_variants (list): List of keywords which occur after max value in text min_max_range_variants (list): List of keywords which occur in between min and max value in text Args: data_directory_path (str): Data directory path Returns: None """ number_range_df = pd.read_csv(os.path.join( data_directory_path, numeral_constant.NUMBER_RANGE_KEYWORD_FILE_NAME), encoding='utf-8') for index, row in number_range_df.iterrows(): range_variants = get_list_from_pipe_sep_string( row[numeral_constant.COLUMN_NUMBER_RANGE_VARIANTS]) for variant in range_variants: self.range_variants_map[variant] = \ NumberRangeVariant(position=row[numeral_constant.COLUMN_NUMBER_RANGE_POSITION], range_type=row[numeral_constant.COLUMN_NUMBER_RANGE_RANGE_TYPE]) self.min_range_prefix_variants = [ re.escape(variant) for variant, value in self.range_variants_map.items() if (value.position == -1 and value.range_type == numeral_constant.NUMBER_RANGE_MIN_TYPE) ] self.min_range_suffix_variants = [ re.escape(variant) for variant, value in self.range_variants_map.items() if (value.position == 1 and value.range_type == numeral_constant.NUMBER_RANGE_MIN_TYPE) ] self.max_range_prefix_variants = [ re.escape(variant) for variant, value in self.range_variants_map.items() if (value.position == -1 and value.range_type == numeral_constant.NUMBER_RANGE_MAX_TYPE) ] self.max_range_suffix_variants = [ re.escape(variant) for variant, value in self.range_variants_map.items() if (value.position == 1 and value.range_type == numeral_constant.NUMBER_RANGE_MAX_TYPE) ] self.min_max_range_variants = [ re.escape(variant) for variant, value in self.range_variants_map.items() if (value.position == 0 and value.range_type == numeral_constant.NUMBER_RANGE_MIN_MAX_TYPE) ] def _tag_number_in_text(self, processed_text): """ replace number in text with number tag from number_detected_map Args: processed_text (str): processed text Returns: (str): text with number replaced with tag Examples: >>> text = 'i want to buy 3 apples and more than two bananas' >>> number_detected_map = {'__number__0': ({'value': '2', 'unit': None}, 'two'), '__number__1': ({'value': '3', 'unit': None}, '3')} >>> self._tag_number_in_text(text) i want to buy __number__1 apples and more than __number__0 bananas """ tagged_number_text = processed_text sorted_number_detected_map = sorted( self.number_detected_map.items(), key=lambda kv: len(kv[1].original_text), reverse=True) for number_tag in sorted_number_detected_map: tagged_number_text = tagged_number_text.replace( number_tag[1].original_text, number_tag[0], 1) return tagged_number_text def _get_number_tag_dict(self): """ Method to create number tag dict. Its run number detection on text and create a dict having number tag as key and value as tuple of entity value and original text. Returns: (dict): dict containing number tag and their corresponding value and original text Examples: >>> text = 'I want 12 dozen banana' >>> self._get_number_tag_dict() {'__number_1': ({'value': 12, 'unit': None}, '12')} """ detected_number_dict = {} entity_value_list, original_text_list = self.number_detector.detect_entity( self.processed_text) for index, (entity_value, original_text) in enumerate( zip(entity_value_list, original_text_list)): detected_number_dict[numeral_constant.NUMBER_REPLACE_TEXT + str(index)] = ValueTextPair( entity_value=entity_value, original_text=original_text) return detected_number_dict def _get_original_text_from_tagged_text(self, number_tag_text): """ Return original text value of number tag from number detected map Args: number_tag_text (str): tagged number Returns: (str or None): Original value of tagged number if found else None """ original = number_tag_text for number_tag in self.number_detected_map: original = original.replace( number_tag, self.number_detected_map[number_tag].original_text) if original == number_tag_text: return None return original def detect_number_range(self, text): """ Detect number-range from number range text. Run through list of detectors defined in detector_preferences in the preferences. Args: text(str): text string Returns: (tuple): a tuple containing (list): list containing detected numeric text (list): list containing original numeral text """ self.text = text self.tagged_text = text self.processed_text = text self.number_detected_map = self._get_number_tag_dict() self.processed_text = self._tag_number_in_text(text) number_list, original_list = None, None for detector in self.detector_preferences: number_list, original_list = detector(number_list, original_list) self._update_tagged_text(original_list) return number_list, original_list def _get_number_range(self, min_part_match, max_part_match, full_match): """ Update number_range_list and original_list by finding entity value of number tag and original text from number_detected_map Args: min_part_match (str or None): tagged min number max_part_match (str or None): tagged max number full_match (str): text matching regex Returns: (tuple): a tuple containing (list): list containing detected numeric text (list): list containing original numeral text """ number_range = None original_text = None if full_match not in self.processed_text: return number_range, original_text entity_value_min, entity_value_max, entity_unit = None, None, None if min_part_match and min_part_match in self.number_detected_map: entity_dict = self.number_detected_map[min_part_match].entity_value entity_value_min = entity_dict[ numeral_constant.NUMBER_DETECTION_RETURN_DICT_VALUE] entity_unit = entity_dict[ numeral_constant.NUMBER_DETECTION_RETURN_DICT_UNIT] if max_part_match and max_part_match in self.number_detected_map: entity_dict = self.number_detected_map[max_part_match].entity_value entity_value_max = entity_dict[ numeral_constant.NUMBER_DETECTION_RETURN_DICT_VALUE] entity_unit = entity_dict[ numeral_constant.NUMBER_DETECTION_RETURN_DICT_UNIT] if self.unit_type and (entity_unit is None or self.number_detector.get_unit_type(entity_unit) != self.unit_type): return number_range, original_text original_text = self._get_original_text_from_tagged_text(full_match) if (entity_value_min or entity_value_max) and original_text: self.processed_text = self.processed_text.replace( full_match.strip(), '', 1) original_text = original_text.strip() number_range = { numeral_constant.NUMBER_RANGE_MIN_VALUE: entity_value_min, numeral_constant.NUMBER_RANGE_MAX_VALUE: entity_value_max, numeral_constant.NUMBER_RANGE_VALUE_UNIT: entity_unit } return number_range, original_text def _detect_min_num_range_with_prefix_variants(self, number_range_list=None, original_list=None): """ Method to detect number range containing only min value and keywords which identify value as min present before them. Example - More than 2 {'more than' => keyword, '2' => min value}, At least seven hundred rupees {'At least' => keyword, 'seven hundred rupees'=>min value} Args: number_range_list (list): original_list (list): Returns: (tuple): a tuple containing (list): list containing detected numeric text (list): list containing original numeral text """ number_range_list = number_range_list or [] original_list = original_list or [] if self.min_range_prefix_variants: min_prefix_choices = '|'.join(self.min_range_prefix_variants) min_range_start_pattern = re.compile( ur'((?:{min_prefix_choices})\s+({number}\d+))'.format( number=numeral_constant.NUMBER_REPLACE_TEXT, min_prefix_choices=min_prefix_choices), re.UNICODE) number_range_matches = min_range_start_pattern.findall( self.processed_text) for match in number_range_matches: number_range, original_text = self._get_number_range( min_part_match=match[1], max_part_match=None, full_match=match[0]) if number_range and original_text: number_range_list.append(number_range) original_list.append(original_text) return number_range_list, original_list def _detect_min_num_range_with_suffix_variants(self, number_range_list=None, original_list=None): """ Method to detect number range containing only min value and keywords which identify value as min present after them. Args: number_range_list (list): original_list (list): Returns: (tuple): a tuple containing (list): list containing detected numeric text (list): list containing original numeral text """ number_range_list = number_range_list or [] original_list = original_list or [] if self.min_range_suffix_variants: min_suffix_choices = '|'.join(self.min_range_suffix_variants) min_range_end_pattern = re.compile( ur'(({number}\d+)\s+(?:{min_suffix_choices}))'.format( number=numeral_constant.NUMBER_REPLACE_TEXT, min_suffix_choices=min_suffix_choices), re.UNICODE) number_range_matches = min_range_end_pattern.findall( self.processed_text) for match in number_range_matches: number_range, original_text = self._get_number_range( min_part_match=match[1], max_part_match=None, full_match=match[0]) if number_range and original_text: number_range_list.append(number_range) original_list.append(original_text) return number_range_list, original_list def _detect_max_num_range_with_prefix_variants(self, number_range_list=None, original_list=None): """ Method to detect number range containing only max value and keywords which identify value as min present before them. Example - less than 2 {'less than' => keyword, '2' => max value}, At most seven hundred rupees {'At most' => keyword, 'seven hundred rupees'=>min value} Args: number_range_list (list): original_list (list): Returns: (tuple): a tuple containing (list): list containing detected numeric text (list): list containing original numeral text """ number_range_list = number_range_list or [] original_list = original_list or [] if self.max_range_prefix_variants: max_prefix_choices = '|'.join(self.max_range_prefix_variants) max_range_start_pattern = re.compile( ur'((?:{max_prefix_choices})\s+({number}\d+))'.format( number=numeral_constant.NUMBER_REPLACE_TEXT, max_prefix_choices=max_prefix_choices), re.UNICODE) number_range_matches = max_range_start_pattern.findall( self.processed_text) for match in number_range_matches: number_range, original_text = self._get_number_range( min_part_match=None, max_part_match=match[1], full_match=match[0]) if number_range and original_text: number_range_list.append(number_range) original_list.append(original_text) return number_range_list, original_list def _detect_max_num_range_with_suffix_variants(self, number_range_list=None, original_list=None): """ Method to detect number range containing only max value and keywords which identify value as min present after them. Args: number_range_list (list): original_list (list): Returns: (tuple): a tuple containing (list): list containing detected numeric text (list): list containing original numeral text """ number_range_list = number_range_list or [] original_list = original_list or [] if self.max_range_suffix_variants: max_suffix_choices = '|'.join(self.max_range_suffix_variants) max_range_end_pattern = re.compile( ur'(({number}\d+)\s+(?:{max_suffix_choices}))'.format( number=numeral_constant.NUMBER_REPLACE_TEXT, max_suffix_choices=max_suffix_choices), re.UNICODE) number_range_matches = max_range_end_pattern.findall( self.processed_text) for match in number_range_matches: number_range, original_text = self._get_number_range( min_part_match=None, max_part_match=match[1], full_match=match[0]) if number_range and original_text: number_range_list.append(number_range) original_list.append(original_text) return number_range_list, original_list def _detect_min_max_num_range(self, number_range_list=None, original_list=None): """ Method to detect number range containing both min and max value and keywords them present in between Example - 2000 to 30000 {'to' => keyword, '2000' => min value, '30000' => ,max_value}, 2k-3k hundred rupees {'-' => keyword, '2k' => min value, '3k' => ,max_value} Args: number_range_list (list): original_list (list): Returns: (tuple): a tuple containing (list): list containing detected numeric text (list): list containing original numeral text """ number_range_list = number_range_list or [] original_list = original_list or [] if self.min_max_range_variants: min_max_choices = '|'.join(self.min_max_range_variants) min_max_range_pattern = re.compile( ur'(({number}\d+)\s*(?:{min_max_choices})\s*' ur'({number}\d+))'.format( number=numeral_constant.NUMBER_REPLACE_TEXT, min_max_choices=min_max_choices), re.UNICODE) number_range_matches = min_max_range_pattern.findall( self.processed_text) for match in number_range_matches: number_range, original_text = self._get_number_range( min_part_match=match[1], max_part_match=match[2], full_match=match[0]) if number_range and original_text: number_range_list.append(number_range) original_list.append(original_text) return number_range_list, original_list def _update_tagged_text(self, original_number_list): """ Replaces detected date with tag generated from entity_name used to initialize the object with A final string with all dates replaced will be stored in object's tagged_text attribute A string with all dates removed will be stored in object's processed_text attribute Args: original_number_list (list): list of substrings of original text to be replaced with tag created from entity_name """ for detected_text in original_number_list: self.tagged_text = self.tagged_text.replace( detected_text, self.tag)
def get_currency(text,detected_lang): from ner_v2.detectors.numeral.number.number_detection import NumberDetector detector = NumberDetector(entity_name='number', language=detected_lang,unit_type='currency') number = detector.detect_entity(text) return number