コード例 #1
0
ファイル: parsers.py プロジェクト: zhamppx97/Recognizers-Text
 def parse(self, ext_result: ExtractResult):
     result = ParseResult(ext_result)
     result.start = ext_result.start
     result.length = ext_result.length
     result.text = ext_result.text
     result.type = ext_result.type
     result.resolution_str = self.drop_leading_zeros(ext_result.text)
     result.data = ext_result.data
     return result
コード例 #2
0
ファイル: parsers.py プロジェクト: mrussek/Recognizers-Text
 def parse(self, source: ExtractResult):
     res = ParseResult(source)
     res.resolution_str = source.text
     res.start = source.start
     res.length = source.length
     res.text = source.text
     res.type = source.type
     res.value = self.score_guid(source.text)
     return res
コード例 #3
0
    def _digit_number_parse(self, ext_result: ExtractResult) -> ParseResult:
        result = ParseResult()
        result.start = ext_result.start
        result.length = ext_result.length
        result.text = ext_result.text
        result.type = ext_result.type
        result.meta_data = MetaData(
        ) if not result.meta_data else result.meta_data

        # [1] 24
        # [2] 12 32/33
        # [3] 1,000,000
        # [4] 234.567
        # [5] 44/55
        # [6] 2 hundred
        # dot occurred.

        power = 1
        tmp_index = -1
        start_index = 0
        handle = ext_result.text.lower()

        matches = list(regex.finditer(self.config.digital_number_regex,
                                      handle))
        if matches:
            for match in matches:
                rep = self.config.round_number_map.get(match.group())
                # \\s+ for filter the spaces.
                power *= rep

                tmp_index = handle.find(match.group(), start_index)
                while tmp_index >= 0:
                    front = handle[0:tmp_index].rstrip()
                    start_index = len(front)
                    handle = front + handle[tmp_index + len(match):]
                    tmp_index = handle.find(match.group(), start_index)

        # Scale used in the calculate of double
        result.value = self._get_digital_value(handle, power)

        return result
コード例 #4
0
ファイル: parsers.py プロジェクト: mamahara/poc-qnamaker-bot
    def __merge_compound_unit(self,
                              compound_result: ExtractResult) -> ParseResult:
        results = []
        compound_unit = compound_result.data

        count = 0
        result = None
        number_value = ''
        main_unit_value = ''
        main_unit_iso_code = ''
        fraction_unit_string = ''

        idx = 0

        while idx < len(compound_unit):
            extract_result = compound_unit[idx]
            parse_result = self.number_with_unit_parser.parse(extract_result)
            parse_result_value = parse_result.value
            try:
                unit_value = parse_result_value.unit if parse_result_value else None
            except AttributeError:
                unit_value = None
            # Process a new group
            if count == 0:
                if not extract_result.type == Constants.SYS_UNIT_CURRENCY:
                    idx = idx + 1
                    continue

                # Initialize a new result
                result = ParseResult()
                result.start = extract_result.start
                result.length = extract_result.length
                result.text = extract_result.text
                result.type = extract_result.type

                main_unit_value = unit_value
                if parse_result_value and parse_result_value.number:
                    number_value = float(parse_result_value.number)
                result.resolution_str = parse_result.resolution_str

                main_unit_iso_code = self.config.currency_name_to_iso_code_map.get(
                    unit_value, None)
                # If the main unit can't be recognized, finish process this group.
                if not main_unit_iso_code:
                    result.value = UnitValue(
                        self.__get_number_value(number_value), main_unit_value)
                    results.append(result)
                    result = None
                    idx = idx + 1
                    continue

                fraction_units_string = self.config.currency_fraction_mapping.get(
                    main_unit_iso_code)
            else:
                if extract_result.type == Constants.SYS_NUM:
                    number_value = number_value + \
                        float(parse_result.value) * (1 / 100)
                    result.resolution_str = result.resolution_str + ' ' + str(
                        parse_result.resolution_str or '')
                    result.length = parse_result.start + parse_result.length - result.start
                    count = count + 1
                    idx = idx + 1
                    continue

                fraction_unit_code = self.config.currency_fraction_code_list.get(
                    unit_value, None)
                fraction_num_value = self.config.currency_fraction_num_map.get(
                    parse_result_value.unit,
                    None) if parse_result_value else None

                if fraction_unit_code and fraction_num_value != 0 and self.__check_units_string_contains(
                        fraction_unit_code, fraction_units_string):
                    number_value = number_value + (
                        float(parse_result_value.number) *
                        (1 / fraction_num_value) if parse_result_value else 0)
                    result.resolution_str = result.resolution_str + ' ' + parse_result.resolution_str
                    result.length = parse_result.start + parse_result.length - result.start
                else:
                    if result:
                        result = self.__create_currency_result(
                            result, main_unit_iso_code, number_value,
                            main_unit_value)
                        results.append(result)
                        result = None

                    count = 0
                    number_value = ''
                    continue

            count = count + 1
            idx = idx + 1

        if result:
            result = self.__create_currency_result(result, main_unit_iso_code,
                                                   number_value,
                                                   main_unit_value)
            results.append(result)

        self.__resolve_text(results, compound_result.text,
                            compound_result.start)

        ret = ParseResult(compound_result)

        ret.value = results
        return ret
コード例 #5
0
    def _frac_like_number_parse(self,
                                ext_result: ExtractResult) -> ParseResult:
        result = ParseResult()
        result.start = ext_result.start
        result.length = ext_result.length
        result.text = ext_result.text
        result.type = ext_result.type

        result_text = ext_result.text.lower()
        if regex.search(self.config.fraction_marker_token, result_text):
            over_index = result_text.find(self.config.fraction_marker_token)
            small_part = result_text[0:over_index].strip()
            big_part = result_text[over_index +
                                   len(self.config.fraction_marker_token
                                       ):len(result_text)].strip()
            small_value = self._get_digital_value(
                small_part, 1) if self._is_digit(
                    small_part[0]) else self.__get_int_value(
                        self.__get_matches(small_part))
            big_value = self._get_digital_value(big_part, 1) if self._is_digit(
                big_part[0]) else self.__get_int_value(
                    self.__get_matches(big_part))

            result.value = small_value / big_value
        else:
            words = list(filter(lambda x: x, result_text.split(' ')))
            frac_words = self.config.normalize_token_set(words, result)

            # Split fraction with integer
            split_index = len(frac_words) - 1
            current_value = self.config.resolve_composite_number(
                frac_words[split_index])
            round_value = 1

            for split_index in range(len(frac_words) - 2, -1, -1):
                if (frac_words[split_index]
                        in self.config.written_fraction_separator_texts
                        or frac_words[split_index]
                        in self.config.written_integer_separator_texts):
                    continue
                previous_value = current_value
                current_value = self.config.resolve_composite_number(
                    frac_words[split_index])

                sm_hundreds = 100

                # previous: hundred
                # current: one
                if ((previous_value >= sm_hundreds
                     and previous_value > current_value) or
                    (previous_value < sm_hundreds
                     and self.__is_composable(current_value, previous_value))):
                    if (previous_value < sm_hundreds
                            and current_value >= round_value):
                        round_value = current_value
                    elif (previous_value < sm_hundreds
                          and current_value < round_value):
                        split_index += 1
                        break

                    # current is the first word
                    if split_index == 0:
                        # scan, skip the first word
                        split_index = 1
                        while split_index <= len(frac_words) - 2:
                            # e.g. one hundred thousand
                            # frac[i+1] % 100 and frac[i] % 100 = 0
                            if (self.config.resolve_composite_number(
                                    frac_words[split_index]) >= sm_hundreds
                                    and not frac_words[split_index + 1] in self
                                    .config.written_fraction_separator_texts
                                    and self.config.resolve_composite_number(
                                        frac_words[split_index + 1]) <
                                    sm_hundreds):
                                split_index += 1
                                break
                            split_index += 1
                        break
                    continue
                split_index += 1
                break

            frac_part = []
            for i in range(split_index, len(frac_words)):
                if frac_words[i].find('-') > -1:
                    split = frac_words[i].split('-')
                    frac_part.append(split[0])
                    frac_part.append('-')
                    frac_part.append(split[1])
                else:
                    frac_part.append(frac_words[i])

            frac_words = frac_words[:split_index]

            # denomi = denominator
            denomi_value = self.__get_int_value(frac_part)
            # Split mixed number with fraction
            numer_value = 0
            int_value = 0

            mixed_index = len(frac_words)
            for i in range(len(frac_words) - 1, -1, -1):
                if (i < len(frac_words) - 1 and frac_words[i]
                        in self.config.written_fraction_separator_texts):
                    numer_str = ' '.join(frac_words[i + 1:len(frac_words)])
                    numer_value = self.__get_int_value(
                        self.__get_matches(numer_str))
                    mixed_index = i + 1
                    break

            int_str = ' '.join(frac_words[0:mixed_index])
            int_value = self.__get_int_value(self.__get_matches(int_str))

            # Find mixed number
            if (mixed_index != len(frac_words) and numer_value < denomi_value):
                # int_value + numer_value / denomi_value
                result.value = int_value + numer_value / denomi_value
            else:
                # (int_value + numer_value) / denomi_value
                result.value = (int_value + numer_value) / denomi_value

            # Convert to float for fixed float point vs. exponential notation consistency /w C#/TS/JS
            result.value = float(result.value)
        return result