コード例 #1
0
    def _digit_number_parse(self, ext_result: ExtractResult) -> ParseResult:
        result = ParseResult()
        result.start = ext_result.start
        result.length = ext_result.length
        result.text = ext_result.text
        result.type = ext_result.type
        result.meta_data = MetaData(
        ) if not result.meta_data else result.meta_data

        # [1] 24
        # [2] 12 32/33
        # [3] 1,000,000
        # [4] 234.567
        # [5] 44/55
        # [6] 2 hundred
        # dot occurred.

        power = 1
        tmp_index = -1
        start_index = 0
        handle = ext_result.text.lower()

        matches = list(regex.finditer(self.config.digital_number_regex,
                                      handle))
        if matches:
            for match in matches:
                rep = self.config.round_number_map.get(match.group())
                # \\s+ for filter the spaces.
                power *= rep

                tmp_index = handle.find(match.group(), start_index)
                while tmp_index >= 0:
                    front = handle[0:tmp_index].rstrip()
                    start_index = len(front)
                    handle = front + handle[tmp_index + len(match):]
                    tmp_index = handle.find(match.group(), start_index)

        # Scale used in the calculate of double
        result.value = self._get_digital_value(handle, power)

        return result
コード例 #2
0
    def dou_parse(self, source: ExtractResult) -> ParseResult:
        result = ParseResult(source)

        source_text = self.replace_unit(source.text)

        if (regex.search(self.config.double_and_round_regex, source.text)) is not None:
            power = self.config.round_number_map_char[source_text[-1:]]
            result.value = self.get_digit_value(source_text[:-1], power)
        else:
            split_result = regex.split(self.config.point_regex, source_text)
            if split_result[0] == '':
                split_result[0] = '零'
            if regex.search(self.config.negative_number_sign_regex, split_result[0]) is not None:
                result.value = self.get_int_value(split_result[0]) - self.get_point_value(split_result[1])
            else:
                result.value = self.get_int_value(split_result[0]) + self.get_point_value(split_result[1])

        result.resolution_str = self.__format(result.value)
        return result
コード例 #3
0
ファイル: parsers.py プロジェクト: zzxcv1314/Recognizers-Text
    def per_parse_chs(self, source: ExtractResult) -> ParseResult:
        result = ParseResult(source)
        source_text = source.text
        power = 1

        if 'Spe' in source.data:
            source_text = self.replace_full_with_half(source_text)
            source_text = self.replace_unit(source_text)

            if source_text == '半折':
                result.value = 50
            elif source_text == '10成':
                result.value = 100
            else:
                matches = list(regex.finditer(self.config.spe_get_number_regex, source_text))
                int_number: int
                if len(matches) == 2:
                    int_number_char = matches[0].group()[0]
                    if int_number_char == '对':
                        int_number = 5
                    elif int_number_char == '十' or int_number_char == '拾':
                        int_number = 10
                    else:
                        int_number = self.config.zero_to_nine_map_chs[int_number_char]

                    point_number_char = matches[1].group()[0]
                    point_number: float
                    if point_number_char == '半':
                        point_number = 0.5
                    else:
                        point_number = self.config.zero_to_nine_map_chs[point_number_char] * 0.1

                    result.value = (int_number + point_number) * 10
                else:
                    int_number_char = matches[0].group()[0]
                    if int_number_char == '对':
                        int_number = 5
                    elif int_number_char == '十' or int_number_char == '拾':
                        int_number = 10
                    else:
                        int_number = self.config.zero_to_nine_map_chs[int_number_char]
                    result.value = int_number * 10

        elif 'Num' in source.data:
            double_match = regex.search(self.config.percentage_regex, source_text)
            double_text = double_match.group()

            if any(x for x in ['k', 'K', 'k', 'K'] if x in double_text):
                power = 1000
            elif any(x for x in ['M', 'M'] if x in double_text):
                power = 1000000
            elif any(x for x in ['G', 'G'] if x in double_text):
                power = 1000000000
            elif any(x for x in ['T', 'T'] if x in double_text):
                power = 1000000000000
            result.value = self.get_digit_value_chs(double_text, power)

        else:
            double_match = regex.search(self.config.percentage_regex, source_text)
            double_text = self.replace_unit(double_match.group())

            split_result = regex.split(self.config.point_regex_chs, double_text)
            if split_result[0] == '':
                split_result[0] = '零'

            double_value = self.get_int_value_chs(split_result[0])
            if len(split_result) == 2:
                if regex.search(self.config.negative_number_sign_regex, split_result[0]) is not None:
                    double_value -= self.get_point_value_chs(split_result[1])
                else:
                    double_value += self.get_point_value_chs(split_result[1])
            result.value = double_value

        result.resolution_str = self.__format(result.value) + '%'
        return result