Esempio n. 1
0
    def get_year_from_text(self, match: Match) -> int:
        year = Constants.INVALID_YEAR

        year_str = RegExpUtility.get_group(match, 'year')
        if year_str and not (str.isspace(year_str) or year_str is None):
            year = int(year_str)
            if 100 > year >= Constants.MIN_TWO_DIGIT_YEAR_PAST_NUM:
                year += 1900
            elif 0 <= year < Constants.MAX_TWO_DIGIT_YEAR_FUTURE_NUM:
                year += 2000
        else:
            first_two_year_num_str = RegExpUtility.get_group(
                match, Constants.FIRST_TWO_YEAR_NUM)

            if first_two_year_num_str and not (
                    str.isspace(first_two_year_num_str)
                    or first_two_year_num_str is None):

                er = ExtractResult()
                er.text = first_two_year_num_str
                er.start = match.string.index(
                    RegExpUtility.get_group(match,
                                            Constants.FIRST_TWO_YEAR_NUM))
                er.length = len(
                    RegExpUtility.get_group(match,
                                            Constants.FIRST_TWO_YEAR_NUM))

                first_two_year_num = self.config.number_parser.parse(er).value if \
                    self.config.number_parser.parse(er).value else 0

                last_two_year_num = 0
                last_two_year_num_str = RegExpUtility.get_group(
                    match, Constants.LAST_TWO_YEAR_NUM)

                if not (str.isspace(last_two_year_num_str)
                        or last_two_year_num_str is None):
                    er = ExtractResult()
                    er.text = last_two_year_num_str
                    er.start = match.string.index(
                        RegExpUtility.get_group(match,
                                                Constants.LAST_TWO_YEAR_NUM))
                    er.length = len(
                        RegExpUtility.get_group(match,
                                                Constants.LAST_TWO_YEAR_NUM))

                    last_two_year_num = self.config.number_parser.parse(er).value if \
                        self.config.number_parser.parse(er).value else 0

                if (first_two_year_num < 100 and last_two_year_num == 0)\
                        or (first_two_year_num < 100 and first_two_year_num % 10 == 0
                            and len(last_two_year_num_str.strip().split(' ')) == 1):
                    year = Constants.INVALID_YEAR
                    return year

                if first_two_year_num >= 100:
                    year = first_two_year_num + last_two_year_num
                else:
                    year = (first_two_year_num * 100) + last_two_year_num

        return year
Esempio n. 2
0
    def try_merge_modifier_token(self, extract_result: ExtractResult, token_regex: Pattern, text: str):
        start = extract_result.start if extract_result.start else 0
        before_str = text[0:start]

        if self.has_token_index(before_str.rstrip(), token_regex).matched:
            boolean, token_index = self.has_token_index(before_str.rstrip(), token_regex)

            mod_length = len(before_str) - token_index

            extract_result.length += mod_length
            extract_result.start -= mod_length
            start = extract_result.start if extract_result.start else 0
            length = extract_result.length if extract_result.length else 0
            extract_result.text = text[start: start + length]
            return True

        return False