def _digit_number_parse(self, ext_result: ExtractResult) -> ParseResult: result = ParseResult() result.start = ext_result.start result.length = ext_result.length result.text = ext_result.text result.type = ext_result.type result.meta_data = MetaData( ) if not result.meta_data else result.meta_data # [1] 24 # [2] 12 32/33 # [3] 1,000,000 # [4] 234.567 # [5] 44/55 # [6] 2 hundred # dot occurred. power = 1 tmp_index = -1 start_index = 0 handle = ext_result.text.lower() matches = list(regex.finditer(self.config.digital_number_regex, handle)) if matches: for match in matches: rep = self.config.round_number_map.get(match.group()) # \\s+ for filter the spaces. power *= rep tmp_index = handle.find(match.group(), start_index) while tmp_index >= 0: front = handle[0:tmp_index].rstrip() start_index = len(front) handle = front + handle[tmp_index + len(match):] tmp_index = handle.find(match.group(), start_index) # Scale used in the calculate of double result.value = self._get_digital_value(handle, power) return result
def dou_parse(self, source: ExtractResult) -> ParseResult: result = ParseResult(source) source_text = self.replace_unit(source.text) if (regex.search(self.config.double_and_round_regex, source.text)) is not None: power = self.config.round_number_map_char[source_text[-1:]] result.value = self.get_digit_value(source_text[:-1], power) else: split_result = regex.split(self.config.point_regex, source_text) if split_result[0] == '': split_result[0] = '零' if regex.search(self.config.negative_number_sign_regex, split_result[0]) is not None: result.value = self.get_int_value(split_result[0]) - self.get_point_value(split_result[1]) else: result.value = self.get_int_value(split_result[0]) + self.get_point_value(split_result[1]) result.resolution_str = self.__format(result.value) return result
def per_parse_chs(self, source: ExtractResult) -> ParseResult: result = ParseResult(source) source_text = source.text power = 1 if 'Spe' in source.data: source_text = self.replace_full_with_half(source_text) source_text = self.replace_unit(source_text) if source_text == '半折': result.value = 50 elif source_text == '10成': result.value = 100 else: matches = list(regex.finditer(self.config.spe_get_number_regex, source_text)) int_number: int if len(matches) == 2: int_number_char = matches[0].group()[0] if int_number_char == '对': int_number = 5 elif int_number_char == '十' or int_number_char == '拾': int_number = 10 else: int_number = self.config.zero_to_nine_map_chs[int_number_char] point_number_char = matches[1].group()[0] point_number: float if point_number_char == '半': point_number = 0.5 else: point_number = self.config.zero_to_nine_map_chs[point_number_char] * 0.1 result.value = (int_number + point_number) * 10 else: int_number_char = matches[0].group()[0] if int_number_char == '对': int_number = 5 elif int_number_char == '十' or int_number_char == '拾': int_number = 10 else: int_number = self.config.zero_to_nine_map_chs[int_number_char] result.value = int_number * 10 elif 'Num' in source.data: double_match = regex.search(self.config.percentage_regex, source_text) double_text = double_match.group() if any(x for x in ['k', 'K', 'k', 'K'] if x in double_text): power = 1000 elif any(x for x in ['M', 'M'] if x in double_text): power = 1000000 elif any(x for x in ['G', 'G'] if x in double_text): power = 1000000000 elif any(x for x in ['T', 'T'] if x in double_text): power = 1000000000000 result.value = self.get_digit_value_chs(double_text, power) else: double_match = regex.search(self.config.percentage_regex, source_text) double_text = self.replace_unit(double_match.group()) split_result = regex.split(self.config.point_regex_chs, double_text) if split_result[0] == '': split_result[0] = '零' double_value = self.get_int_value_chs(split_result[0]) if len(split_result) == 2: if regex.search(self.config.negative_number_sign_regex, split_result[0]) is not None: double_value -= self.get_point_value_chs(split_result[1]) else: double_value += self.get_point_value_chs(split_result[1]) result.value = double_value result.resolution_str = self.__format(result.value) + '%' return result