Python Token Exemples, recognizers_date_time.Token Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : base_date.py Projet : clintwar/Recognizers-Text

    def basic_regex_match(self, source: str) -> []:
        from .utilities import Token
        from .utilities import RegExpUtility
        ret: List[Token] = list()

        for regexp in self.config.date_regex_list:

            matches = list(regexp.finditer(source))
            if matches is not None:
                for match in matches:

                    # some match might be part of the date range entity, and might be split in a wrong way
                    if self.validate_match(match, source):

                        # Cases that the relative term is before
                        # the detected date entity, like "this 5/12", "next friday 5/12"
                        pre_text = source[0:source.index(match.group())]
                        relative_regex = RegExpUtility.match_end(self.config.strict_relative_regex, pre_text, True)

                        if relative_regex:
                            if relative_regex.success:
                                ret.append(Token(relative_regex.index, source.index(match.group()) + match.end() - match.start()))
                            else:
                                ret.append(Token(source.index(match.group()),
                                                 source.index(match.group()) + match.end() - match.start()))
                        else:
                            ret.append(Token(source.index(match.group()),
                                             source.index(match.group()) + match.end() - match.start()))

        return ret

Exemple #2

0

Afficher le fichier

Fichier : base_date.py Projet : clintwar/Recognizers-Text

    def extract_relative_duration_date_with_in_prefix(self, text: str, duration_er: [ExtractResult],
                                                      reference: datetime):
        from .utilities import Token
        result: [Token] = []

        durations: [Token] = []

        for duration_extraction in duration_er:

            match = self.config.date_unit_regex.search(duration_extraction.text)
            if match:
                durations.append(Token(duration_extraction.start or 0, (duration_extraction.start or 0)
                                       + duration_extraction.length or 0))

        for duration in durations:
            before_str = text[0:duration.start]
            after_str = text[duration.start + duration.length:]

            if (str.isspace(before_str) or before_str is None) and (str.isspace(after_str) or after_str is None):
                continue

            ers, success = self.extract_in_connector(text, after_str, before_str, duration, True)
            result.append(ers)
            if not success and self.config.check_both_before_after:
                ers, success = self.extract_in_connector(text, after_str, before_str, duration, True)
                result.append(ers)

        return flatten(result)

Exemple #3

0

Afficher le fichier

Fichier : base_date.py Projet : clintwar/Recognizers-Text

    def extract_in_connector(self, text, first_str, second_str, duration, in_prefix):
        from recognizers_date_time import Token
        result = []
        match = RegExpUtility.match_end(self.config.in_connector_regex, first_str, True) if in_prefix else RegExpUtility.match_begin(self.config.in_connector_regex, first_str, True)
        success = False if not match else match.success
        if match and match.success:

            start_token = match.index
            range_unit_math = self.config.range_unit_regex.match(text[duration.start: duration.start
                                                                      + duration.length])

            if range_unit_math:
                since_year_match = self.config.since_year_suffix_regex.match(second_str)

                if since_year_match:
                    result.append(Token(start_token, duration.end + len(since_year_match)))

                else:
                    result.append(Token(start_token, duration.end))
        return result, success

Exemple #4

0

Afficher le fichier

Fichier : base_date.py Projet : clintwar/Recognizers-Text

    def number_with_month(self, source: str, reference: datetime) -> []:
        from .utilities import Token
        from .utilities import DateUtils
        ret: List[Token] = list()
        extract_results = self.config.ordinal_extractor.extract(source)
        extract_results.extend(self.config.integer_extractor.extract(source))

        for result in extract_results:
            num = int(self.config.number_parser.parse(result).value)

            if num < 1 or num > 31:
                continue

            if result.start >= 0:
                front_string = source[0:result.start or 0]
                match = regex.search(self.config.month_end, front_string)

                if match is not None:
                    start_index = match.start()
                    result_length = result.length if result.length else 0
                    end_index = match.start() + len(match.group()) + result_length

                    start_index, end_index = self.extend_with_week_day_and_year(
                        start_index, end_index, self.config.month_of_year[str(RegExpUtility.get_group(
                            match, Constants.MONTH_GROUP_NAME)).lower()], num, source, reference)

                    ret.append(
                        Token(match.start(), end_index))
                    continue

                # handling cases like 'for the 25th'
                matches = regex.finditer(self.config.for_the_regex, source)
                is_found = False

                for match_case in matches:
                    if match_case is not None:
                        ordinal_num = RegExpUtility.get_group(
                            match_case, Constants.DAY_OF_MONTH)

                        if ordinal_num == result.text:
                            length = len(
                                RegExpUtility.get_group(match_case, TimeTypeConstants.END))
                            ret.append(Token(match_case.start(),
                                             match_case.end() - length))
                            is_found = True

                if is_found:
                    continue

                # handling cases like 'Thursday the 21st', which both 'Thursday' and '21st' refer to a same date
                matches = regex.finditer(
                    self.config.week_day_and_day_of_month_regex, source)

                for match_case in matches:
                    if match_case is not None:
                        ordinal_num = RegExpUtility.get_group(
                            match_case, Constants.DAY_OF_MONTH)

                        if ordinal_num == result.text:
                            month = reference.month
                            year = reference.year

                            # get week of day for the ordinal number which is regarded as a date of reference month
                            date = DateUtils.safe_create_from_min_value(
                                year, month, num)
                            num_week_day_str: str = calendar.day_name[date.weekday()].lower(
                            )

                            # get week day from text directly, compare it with the weekday generated above
                            # to see whether they refer to a same week day
                            extracted_week_day_str = RegExpUtility.get_group(
                                match_case, 'weekday').lower()
                            if (date != DateUtils.min_value and
                                    self.config.day_of_week[num_week_day_str] ==
                                    self.config.day_of_week[extracted_week_day_str]):
                                ret.append(
                                    Token(match_case.start(), match_case.end()))
                                is_found = True

                if is_found:
                    continue

                # Handling cases like 'Monday 21', which both 'Monday' and '21' refer to the same date
                # The year of expected date can be different to the year of referenceDate.

                matches = regex.finditer(self.config.week_day_and_day_regex, source)
                for match_case in matches:

                    if match_case:
                        match_length = result.start + result.length - match_case.start()

                        if match_length == match_case.start():
                            ret.append(Token(match_case.start(), match_case.end()))
                            is_found = True

                if is_found:
                    continue

                # handling cases like '20th of next month'
                suffix_str: str = source[result.start + result.length:].lower()
                match = regex.match(
                    self.config.relative_month_regex, suffix_str.strip())
                space_len = len(suffix_str) - len(suffix_str.strip())

                if match is not None and match.start() == 0:

                    space_len = len(suffix_str) - len(suffix_str.strip())
                    res_start = result.start
                    res_end = res_start + result.length + space_len + len(match.group())

                    # Check if prefix contains 'the', include it if any
                    prefix = source[: res_start or 0]
                    prefix_match = self.config.prefix_article_regex.match(prefix)
                    if prefix_match:
                        res_start = prefix_match.start()

                    ret.append(
                        Token(res_start, res_end))

                # handling cases like 'second Sunday'

                suffix_str = source[result.start + result.length:]
                match = regex.match(
                    self.config.week_day_regex, suffix_str.strip())
                if (match is not None and match.start() == 0 and 1 <= num <= 5 and
                        result.type == NumberConstants.SYS_NUM_ORDINAL):
                    week_day_str = RegExpUtility.get_group(match, Constants.WEEKDAY_GROUP_NAME).lower()

                    if week_day_str in self.config.day_of_week:
                        ret.append(
                            Token(result.start, result.start + result.length + space_len + len(match.group())))
            # For cases like "I'll go back twenty second of June"
            if result.start + result.length < len(source):
                after_string = source[result.start + result.length:]
                match = regex.match(self.config.of_month, after_string)

                if match is not None:
                    start_index = result.start if result.start else 0
                    result_length = result.length if result.length else 0
                    end_index = (start_index + result_length) + len(match.group())

                    self.extend_with_week_day_and_year(start_index, end_index,
                                                       self.config.month_of_year[RegExpUtility.get_group(
                                                           match, Constants.MONTH_GROUP_NAME).lower() or str(
                                                           reference.month)], num, source, reference)

                    ret.append(Token(start_index, start_index +
                                     result.length + len(match.group())))

        return ret