Exemple #1
0
    def parse_basic_regex_match(self, source: str, reference: datetime):
        from .utilities import DateTimeResolutionResult, DateUtils
        source = source.strip().lower()
        offset = 0
        match = regex.search(self.config.at_regex, source)
        if not match:
            match = regex.search(self.config.at_regex,
                                 self.config.time_token_prefix + source)
            offset = len(self.config.time_token_prefix)

        if match is not None and match.start() == offset and match.group() == source:
            return self.match_to_time(match, reference)

        hour = self.config.numbers.get(source, -1)
        if 0 <= hour <= 24:
            result = DateTimeResolutionResult()
            if hour == 24:
                hour = 0
            if hour <= 12 and hour != 0:
                result.comment = Constants.AM_PM_GROUP_NAME

            result.timex = f'T{hour:02d}'
            result.future_value = DateUtils.safe_create_from_min_value(
                reference.year, reference.month, reference.day, hour, 0, 0)
            result.past_value = result.future_value
            result.success = True
            return result

        for pattern in self.config.time_regexes:
            offset = 0
            match = RegExpUtility.exact_match(pattern, source, True)
            if match and match.success:
                return self.match_to_time(match, reference)

        return DateTimeResolutionResult()
Exemple #2
0
    def merge_two_time_points(self, source: str, reference: datetime) -> List[Token]:
        result: List[Token] = list()
        time_extract_results = self.config.single_time_extractor.extract(source, reference)
        num_extract_results = self.config.integer_extractor.extract(source)

        # Check if it is an ending number
        if num_extract_results:
            time_numbers: List[ExtractResult] = list()

            # check if it is a ending number
            ending_number = False
            num = num_extract_results[-1]
            if num.start + num.length == len(source):
                ending_number = True
            else:
                after = source[num.start + num.length:]
                if regex.search(self.config.general_ending_regex, after) is not None:
                    ending_number = True
            if ending_number:
                time_numbers.append(num)

            i = 0
            j = 0

            while i < len(num_extract_results):
                # find subsequent time point
                num_end = num_extract_results[i].start + num_extract_results[i].length

                while j < len(time_extract_results) and time_extract_results[j].start <= num_end:
                    j += 1

                if j >= len(time_extract_results):
                    break
                # check connector string
                middle = source[num_end:time_extract_results[j].start]
                if RegExpUtility.exact_match(self.config.till_regex, middle, True).success or\
                        self.config.is_connector_token(middle.strip()):
                    time_numbers.append(num_extract_results[i])
                i += 1

            # check overlap
            for time_num in time_numbers:
                overlap: bool = any(map(time_num.overlap, time_extract_results))
                if not overlap:
                    time_extract_results.append(time_num)

            time_extract_results = sorted(time_extract_results, key=lambda x: x.start)

        # merge "{TimePoint} to {TimePoint}", "between {TimePoint} and {TimePoint}"
        i = 0

        while i < len(time_extract_results)-1:
            middle_begin = time_extract_results[i].start + time_extract_results[i].length
            middle_end = time_extract_results[i + 1].start
            middle: str = source[middle_begin:middle_end].strip().lower()
            match = regex.search(self.config.till_regex, middle)

            # handle "{TimePoint} to {TimePoint}"
            if match is not None and match.start() == 0 and match.group() == middle:
                period_begin = time_extract_results[i].start
                period_end = time_extract_results[i + 1].start + time_extract_results[i + 1].length

                # handle "from"
                before = source[0:period_begin].strip().lower()
                after = source[period_end: len(source) - period_end].strip().lower()
                from_index: MatchedIndex = self.config.get_from_token_index(
                    before)
                if from_index.matched:
                    period_begin = from_index.index

                # handle "between"
                between_index: MatchedIndex = self.config.get_between_token_index(
                    before)
                if between_index.matched:
                    period_begin = between_index.index

                # handle "between" in afterStr
                after_index: MatchedIndex = self.config.get_between_token_index(
                    after)
                if after_index.matched:
                    period_end = after_index.index

                result.append(Token(period_begin, period_end))
                i += 2
                continue

            # handle "between {TimePoint} and {TimePoint}"
            if self.config.is_connector_token(middle):
                period_begin = time_extract_results[i].start
                period_end = time_extract_results[i + 1].start + time_extract_results[i + 1].length

                # handle "between"
                before = source[0:period_begin].strip().lower()
                between_index: MatchedIndex = self.config.get_between_token_index(
                    before)
                if between_index.matched:
                    period_begin = between_index.index
                    result.append(Token(period_begin, period_end))
                    i += 2
                    continue

                # handle "between...and..." case when "between" follows the datepoints
                after_str = source[period_end: + len(source) - period_end]
                after_index = self.config.get_between_token_index(after_str)
                if self.config.check_both_before_after and after_index.matched:
                    period_end += after_index.index
                    result.append(Token(period_begin, period_end))

                    # merge two tokens here, increase the index by two
                    i += 2
                    continue

            i += 1

        return result