def _parse_duration(self, source: str, reference: datetime) -> DateTimeResolutionResult:
        result = DateTimeResolutionResult()

        # for case "前两年" "后三年"
        duration_result = next(iter(self.config.duration_extractor.extract(source, reference)), None)
        if not duration_result:
            return result

        match = regex.search(self.unit_regex, duration_result.text)
        if not match:
            return result

        source_unit = RegExpUtility.get_group(match, 'unit').strip().lower()
        if source_unit not in self.config.unit_map:
            return result

        before_str = source[:duration_result.start].strip().lower()
        number_str = duration_result.text[:match.start()].strip().lower()
        number_val = self.__convert_chinese_to_number(number_str)
        num_str = str(number_val)

        return self.__parse_common_duration_with_unit(before_str, source_unit, num_str, reference)
 def __init__(self):
     self._cardinal_extractor = ChineseCardinalExtractor()
     self._single_date_extractor = ChineseDateExtractor()
     self._single_time_extractor = ChineseTimeExtractor()
     self._single_date_time_extractor = ChineseDateTimeExtractor()
     self._preposition_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.DateTimePeriodPrepositionRegex)
     self._till_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.DateTimePeriodTillRegex)
     self._specific_time_of_day_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.SpecificTimeOfDayRegex)
     self._time_of_day_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.TimeOfDayRegex)
     self._followed_unit = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.DateTimePeriodFollowedUnit)
     self._time_unit_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.DateTimePeriodUnitRegex)
 def __init__(self):
     super().__init__(ChineseDateTimePeriodParserConfiguration())
     self.tmo_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.DateTimePeriodMORegex)
     self.taf_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.DateTimePeriodAFRegex)
     self.tev_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.DateTimePeriodEVRegex)
     self.tni_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.DateTimePeriodNIRegex)
     self.unit_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.DateTimePeriodUnitRegex)
     self.time_of_day_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.TimeOfDayRegex)
     self.cardinal_extractor = ChineseCardinalExtractor()
     self.cardinal_parser = CJKNumberParser(
         ChineseNumberParserConfiguration())
Ejemplo n.º 4
0
 def __init__(self):
     self._equal_regex = RegExpUtility.get_safe_reg_exp(
         BaseDateTime.EqualRegex
     )
     self._since_suffix_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.ParserConfigurationSinceSuffix
     )
     self._since_prefix_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.ParserConfigurationSincePrefix
     )
     self._until_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.ParserConfigurationUntil
     )
     self._after_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.AfterRegex
     )
     self._before_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.BeforeRegex
     )
     self._ambiguity_filters_dict = ChineseDateTime.AmbiguityFiltersDict
     self._date_extractor = ChineseDateExtractor()
     self._time_extractor = ChineseTimeExtractor()
     self._date_time_extractor = ChineseDateTimeExtractor()
     self._date_period_extractor = ChineseDatePeriodExtractor()
     self._time_period_extractor = ChineseTimePeriodExtractor()
     self._date_time_period_extractor = ChineseDateTimePeriodExtractor()
     self._holiday_extractor = BaseHolidayExtractor(
         ChineseHolidayExtractorConfiguration())
     self._duration_extractor = ChineseDurationExtractor()
     self._set_extractor = ChineseSetExtractor()
     # TODO When the implementation for these properties is added, change the None values to their respective Regexps
     self._superfluous_word_matcher = None
     self._fail_fast_regex = None
     self._unspecified_date_period_regex = None
     self._suffix_after_regex = None
     self._potential_ambiguous_range_regex = None
     self._ambiguous_range_modifier_prefix = None
     self._around_regex = None
     self._term_filter_regexes = None
     self._datetime_alt_extractor = None
     self._time_zone_extractor = None
 def __init__(self):
     super().__init__()
     self._datetime_period_unit_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.DateTimePeriodUnitRegex)
     self._after_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.AfterRegex)
     self._before_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.BeforeRegex)
     self._date_point_extractor = ChineseDateExtractor()
     self._time_point_extractor = ChineseTimeExtractor()
     self._now_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.NowRegex)
     self._night_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.NightRegex)
     self._time_of_today_before_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.TimeOfTodayRegex)
     self._preposition_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.PrepositionRegex)
     # TODO When the implementation for these properties is added, change the None values to their respective Regexps
     self._year_regex = None
     self._year_suffix = None
     self._suffix_after_regex = None
     self._date_number_connector_regex = None
     self._number_as_time_regex = None
 def __init__(self):
     self._simple_cases_regexes = [
         RegExpUtility.get_safe_reg_exp(ChineseDateTime.SimpleCasesRegex),
         RegExpUtility.get_safe_reg_exp(ChineseDateTime.OneWordPeriodRegex),
         RegExpUtility.get_safe_reg_exp(ChineseDateTime.StrictYearRegex),
         RegExpUtility.get_safe_reg_exp(ChineseDateTime.YearToYear),
         RegExpUtility.get_safe_reg_exp(
             ChineseDateTime.YearToYearSuffixRequired),
         RegExpUtility.get_safe_reg_exp(ChineseDateTime.YearAndMonth),
         RegExpUtility.get_safe_reg_exp(
             ChineseDateTime.PureNumYearAndMonth),
         RegExpUtility.get_safe_reg_exp(
             ChineseDateTime.DatePeriodYearInChineseRegex),
         RegExpUtility.get_safe_reg_exp(ChineseDateTime.WeekOfMonthRegex),
         RegExpUtility.get_safe_reg_exp(ChineseDateTime.SeasonWithYear),
         RegExpUtility.get_safe_reg_exp(ChineseDateTime.QuarterRegex),
     ]
     self._illegal_year_regex = RegExpUtility.get_safe_reg_exp(
         BaseDateTime.IllegalYearRegex)
     self._year_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.YearRegex)
     self._till_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.DatePeriodTillRegex)
     self._followed_unit = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.FollowedUnit)
     self._number_combined_with_unit = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.NumberCombinedWithUnit)
     self._past_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.PastRegex)
     self._future_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.FutureRegex)
     self._date_point_extractor = ChineseDateExtractor()
     self._integer_extractor = ChineseNumberExtractor()
     self._number_parser = BaseNumberParser(
         ChineseNumberParserConfiguration())
Ejemplo n.º 7
0
 def __init__(self):
     self._time_zone_end_regex = RegExpUtility.get_safe_reg_exp(
         "time$|timezone$")
Ejemplo n.º 8
0
    def parse_implicit_date(self, source: str,
                            reference: datetime) -> DateTimeResolutionResult:
        trimmed_source = source.strip()
        result = DateTimeResolutionResult()

        # handle "十二日" "明年这个月三日" "本月十一日"
        match = regex.match(self.special_date_regex, trimmed_source)
        if match:
            year_str = RegExpUtility.get_group(match, 'thisyear')
            month_str = RegExpUtility.get_group(match, 'thismonth')
            day_str = RegExpUtility.get_group(match, 'day')

            month = reference.month
            day = 0
            day = self.config.day_of_month[day_str]
            year = reference.year

            has_year = False
            has_month = False

            if month_str:
                has_month = True
                if regex.search(self.token_next_regex, month_str):
                    month += 1
                    if month == Constants.MAX_MONTH + 1:
                        month = Constants.MIN_MONTH
                        year += 1
                elif regex.search(self.token_last_regex, month_str):
                    month -= 1
                    if month == Constants.MIN_MONTH - 1:
                        month = Constants.MAX_MONTH
                        year -= 1

                if year_str:
                    has_year = True
                    if regex.search(self.token_next_regex, year_str):
                        year += 1
                    elif regex.search(self.token_last_regex, year_str):
                        year -= 1

            result.timex = DateTimeFormatUtil.luis_date(
                year if has_year else -1, month if has_month else -1, day)

            if day > self.get_month_max_day(year, month):
                future_month = month + 1
                past_month = month - 1
                future_year = year
                past_year = year

                if future_month == Constants.MAX_MONTH + 1:
                    future_month = Constants.MIN_MONTH
                    future_year = year + 1

                if past_month == Constants.MIN_MONTH - 1:
                    past_month = Constants.MAX_MONTH
                    past_year = year - 1

                is_future_valid = DateUtils.is_valid_date(
                    future_year, future_month, day)
                is_past_valid = DateUtils.is_valid_date(
                    past_year, past_month, day)

                if is_future_valid and is_past_valid:
                    future_date = DateUtils.safe_create_from_min_value(
                        future_year, future_month, day)
                    past_date = DateUtils.safe_create_from_min_value(
                        past_year, past_month, day)
                elif is_future_valid and not is_past_valid:
                    future_date = past_date = DateUtils.safe_create_from_min_value(
                        future_year, future_month, day)
                elif not is_future_valid and not is_past_valid:
                    future_date = past_date = DateUtils.safe_create_from_min_value(
                        past_year, past_month, day)
                else:
                    future_date = past_date = DateUtils.safe_create_from_min_value(
                        year, month, day)
            else:
                future_date = DateUtils.safe_create_from_min_value(
                    year, month, day)
                past_date = DateUtils.safe_create_from_min_value(
                    year, month, day)

                if not has_month:
                    if future_date < reference:
                        if self.is_valid_date(year, month + 1, day):
                            future_date += datedelta(months=1)
                    if past_date >= reference:
                        if self.is_valid_date(year, month - 1, day):
                            past_date += datedelta(months=-1)
                        elif DateUtils.is_Feb_29th(year, month - 1, day):
                            past_date += datedelta(months=-2)
                elif not has_year:
                    if future_date < reference:
                        if self.is_valid_date(year + 1, month, day):
                            future_date += datedelta(years=1)
                    if past_date >= reference:
                        if self.is_valid_date(year - 1, month, day):
                            past_date += datedelta(years=-1)

            result.future_value = future_date
            result.past_value = past_date
            result.success = True

            return result

        # handle "today", "the day before yesterday"
        match = regex.match(self.config.special_day_regex, trimmed_source)
        if match and match.start() == 0 and len(
                match.group()) == len(trimmed_source):
            swift = self.config.get_swift_day(match.group())
            value = reference + timedelta(days=swift)

            result.timex = DateTimeFormatUtil.luis_date_from_datetime(value)
            result.future_value = result.past_value = DateUtils.safe_create_from_min_value(
                value.year, value.month, value.day)
            result.success = True
            return result

        # handle "this Friday"
        match = regex.match(self.config.this_regex, trimmed_source)
        if match and match.start() == 0 and len(
                match.group()) == len(trimmed_source):
            weekday_str = RegExpUtility.get_group(match, 'weekday')
            value = DateUtils.this(reference,
                                   self.config.day_of_week.get(weekday_str))

            result.timex = DateTimeFormatUtil.luis_date_from_datetime(value)
            result.future_value = value
            result.past_value = value
            result.success = True
            return result

        # handle "next Sunday"
        match = regex.match(self.config.next_regex, trimmed_source)
        if match and match.start() == 0 and len(
                match.group()) == len(trimmed_source):
            weekday_str = RegExpUtility.get_group(match, 'weekday')
            value = DateUtils.next(reference,
                                   self.config.day_of_week.get(weekday_str))

            result.timex = DateTimeFormatUtil.luis_date_from_datetime(value)
            result.future_value = value
            result.past_value = value
            result.success = True
            return result

        # handle "last Friday", "last mon"
        match = regex.match(self.config.last_regex, trimmed_source)
        if match and match.start() == 0 and len(
                match.group()) == len(trimmed_source):
            weekday_str = RegExpUtility.get_group(match, 'weekday')
            value = DateUtils.last(reference,
                                   self.config.day_of_week.get(weekday_str))

            result.timex = DateTimeFormatUtil.luis_date_from_datetime(value)
            result.future_value = value
            result.past_value = value
            result.success = True
            return result

        # handle "Friday"
        match = regex.match(self.config.week_day_regex, trimmed_source)
        if match and match.start() == 0 and len(
                match.group()) == len(trimmed_source):
            weekday_str = RegExpUtility.get_group(match, 'weekday')
            weekday = self.config.day_of_week.get(weekday_str)
            value = DateUtils.this(reference, weekday)

            if weekday == 0:
                weekday = 7

            if weekday < reference.isoweekday():
                value = DateUtils.next(reference, weekday)

            result.timex = 'XXXX-WXX-' + str(weekday)
            future_date = value
            past_date = value

            if future_date < reference:
                future_date += timedelta(weeks=1)

            if past_date >= reference:
                past_date -= timedelta(weeks=1)

            result.future_value = future_date
            result.past_value = past_date
            result.success = True
            return result

        return result
Ejemplo n.º 9
0
    def __init__(self):
        self._date_regex = [
            RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList1),
            RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList2),
            RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList3),
            RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList4),
            RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList5)
        ]

        if ChineseDateTime.DefaultLanguageFallback == Constants.DEFAULT_LANGUAGE_FALLBACK_DMY:
            self._date_regex.append(
                RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList7))
            self._date_regex.append(
                RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList6))
        else:
            self._date_regex.append(
                RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList6))
            self._date_regex.append(
                RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList7))

        self._date_regex.append(
            RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList8))

        self._month_of_year = ChineseDateTime.ParserConfigurationMonthOfYear
        self._day_of_month = ChineseDateTime.ParserConfigurationDayOfMonth
        self._day_of_week = ChineseDateTime.ParserConfigurationDayOfWeek
        self._special_day_regex = RegExpUtility.get_safe_reg_exp(
            ChineseDateTime.SpecialDayRegex)
        self._special_day_with_num_regex = RegExpUtility.get_safe_reg_exp(
            ChineseDateTime.SpecialDayWithNumRegex)
        self._this_regex = RegExpUtility.get_safe_reg_exp(
            ChineseDateTime.DateThisRegex)
        self._next_regex = RegExpUtility.get_safe_reg_exp(
            ChineseDateTime.DateNextRegex)
        self._last_regex = RegExpUtility.get_safe_reg_exp(
            ChineseDateTime.DateLastRegex)
        self._unit_regex = RegExpUtility.get_safe_reg_exp(
            ChineseDateTime.DateUnitRegex)
        self._unit_map = ChineseDateTime.ParserConfigurationUnitMap
        self._week_day_of_month_regex = RegExpUtility.get_safe_reg_exp(
            ChineseDateTime.WeekDayOfMonthRegex)
        self._week_day_regex = RegExpUtility.get_safe_reg_exp(
            ChineseDateTime.WeekDayRegex)
        self._integer_extractor = ChineseIntegerExtractor()
        self._number_parser = CJKNumberParser(
            ChineseNumberParserConfiguration())
Ejemplo n.º 10
0
    def _parse_year_to_year(self, source: str,
                            reference: datetime) -> DateTimeResolutionResult:
        result = DateTimeResolutionResult()

        match = regex.search(self.year_to_year_regex, source)

        if not match:
            match = regex.search(self.year_to_year_suffix_required, source)
            if not match:
                return result

        year_matches = list(regex.finditer(self.config.year_regex, source))
        chinese_year_matches = list(
            regex.finditer(self.chinese_year_regex, source))

        begin_year = 0
        end_year = 0

        if len(year_matches) == 2:
            begin_year = self.__convert_chinese_to_number(
                RegExpUtility.get_group(year_matches[0],
                                        Constants.YEAR_GROUP_NAME))
            end_year = self.__convert_chinese_to_number(
                RegExpUtility.get_group(year_matches[1],
                                        Constants.YEAR_GROUP_NAME))
        elif len(chinese_year_matches) == 2:
            begin_year = self._convert_year(
                RegExpUtility.get_group(chinese_year_matches[0],
                                        Constants.YEAR_CHINESE), True)
            end_year = self._convert_year(
                RegExpUtility.get_group(chinese_year_matches[1],
                                        Constants.YEAR_CHINESE), True)
        elif len(year_matches) == 1 and len(chinese_year_matches) == 1:
            if year_matches[0].start() < chinese_year_matches[0].start():
                begin_year = self.__convert_chinese_to_number(
                    RegExpUtility.get_group(year_matches[0],
                                            Constants.YEAR_GROUP_NAME))
                end_year = self.__convert_chinese_to_number(
                    RegExpUtility.get_group(chinese_year_matches[0],
                                            Constants.YEAR_CHINESE))
            else:
                begin_year = self.__convert_chinese_to_number(
                    RegExpUtility.get_group(chinese_year_matches[0],
                                            Constants.YEAR_CHINESE))
                end_year = self.__convert_chinese_to_number(
                    RegExpUtility.get_group(year_matches[0],
                                            Constants.YEAR_GROUP_NAME))

        begin_year = self.__sanitize_year(begin_year)
        end_year = self.__sanitize_year(end_year)

        begin_date = DateUtils.safe_create_from_min_value(begin_year, 1, 1)
        end_date = DateUtils.safe_create_from_min_value(end_year, 1, 1)
        result.future_value = [begin_date, end_date]
        result.past_value = [begin_date, end_date]

        begin_timex = DateTimeFormatUtil.luis_date_from_datetime(begin_date)
        end_timex = DateTimeFormatUtil.luis_date_from_datetime(end_date)
        result.timex = f'({begin_timex},{end_timex},P{end_year - begin_year}Y)'

        result.success = True
        return result
Ejemplo n.º 11
0
    def _parse_simple_cases(self, source: str,
                            reference: datetime) -> DateTimeResolutionResult:
        result = DateTimeResolutionResult()
        year = reference.year
        month = reference.month
        no_year = False
        input_year = False

        match = regex.search(self.config.simple_cases_regex, source)

        if not match or match.start() != 0 or len(
                match.group()) != len(source):
            return result

        days = RegExpUtility.get_group_list(match, Constants.DAY_GROUP_NAME)
        begin_day = self.config.day_of_month[days[0]]
        end_day = self.config.day_of_month[days[1]]

        month_str = RegExpUtility.get_group(match, Constants.MONTH_GROUP_NAME)

        if month_str.strip() != '':
            month = self.config.month_of_year[month_str]
        else:
            month_str = RegExpUtility.get_group(match, Constants.REL_MONTH)
            month += self.config.get_swift_day_or_month(month_str)

            if month < 0:
                month = 0
                year -= 1
            elif month > 11:
                month = 11
                year += 1

        year_str = RegExpUtility.get_group(match, Constants.YEAR_GROUP_NAME)
        if year_str.strip() != '':
            year = int(year_str)
            input_year = True
        else:
            no_year = True

        begin_date_luis = DateTimeFormatUtil.luis_date(
            year if input_year or self.config.is_future(month_str) else -1,
            month, begin_day)
        end_date_luis = DateTimeFormatUtil.luis_date(
            year if input_year or self.config.is_future(month_str) else -1,
            month, end_day)

        future_past_begin_date = DateUtils.generate_dates(
            no_year, reference, year, month, begin_day)
        future_past_end_date = DateUtils.generate_dates(
            no_year, reference, year, month, end_day)

        result.timex = f'({begin_date_luis},{end_date_luis},P{end_day - begin_day}D)'

        result.future_value = [
            future_past_begin_date[0], future_past_end_date[0]
        ]
        result.past_value = [
            future_past_begin_date[1], future_past_end_date[1]
        ]
        result.success = True
        return result
    def parse_specific_time_of_day(
            self, source: str,
            reference: datetime) -> DateTimeResolutionResult:
        result = DateTimeResolutionResult()
        trimmed_source = source.strip()
        begin_hour = end_hour = end_min = 0

        # Handle 昨晚,今晨
        if RegExpUtility.is_exact_match(self.config.specific_time_of_day_regex,
                                        trimmed_source, True):
            values = self.config.get_matched_time_range(source)
            if not values:
                return result

            swift = values.swift
            date = reference.date() + timedelta(days=swift)
            day = date.day
            month = date.month
            year = date.year

            result.timex = DateTimeFormatUtil.format_date(
                date) + values.time_str
            result.future_value = result.past_value = [
                DateUtils.safe_create_from_min_value(year, month, day,
                                                     values.begin_hour, 0, 0),
                DateUtils.safe_create_from_min_value(year, month, day,
                                                     values.end_hour,
                                                     values.end_min,
                                                     values.end_min)
            ]

            result.success = True
            return result

        # handle morning, afternoon..
        if regex.search(self.tmo_regex, source):
            time_str = 'TMO'
            begin_hour = 8
            end_hour = 12
        elif regex.search(self.tmi_regex, source):
            time_str = 'TMI'
            begin_hour = 11
            end_hour = 13
        elif regex.search(self.taf_regex, source):
            time_str = 'TAF'
            begin_hour = Constants.HALF_DAY_HOUR_COUNT
            end_hour = 16
        elif regex.search(self.tev_regex, source):
            time_str = 'TEV'
            begin_hour = 16
            end_hour = 20
        elif regex.search(self.tni_regex, source):
            time_str = 'TNI'
            begin_hour = 20
            end_hour = 23
            end_min = 59
        else:
            return result

        if RegExpUtility.is_exact_match(self.config.specific_time_of_day_regex,
                                        trimmed_source, True):
            swift = 0
            if regex.search(self.config.next_regex, trimmed_source):
                swift = 1
            elif regex.search(self.config.last_regex, trimmed_source):
                swift = -1

            date = reference.date() + timedelta(days=swift)
            day = date.day
            month = date.month
            year = date.year

            result.timex = DateTimeFormatUtil.format_date(date) + time_str
            result.future_value = result.past_value = [
                DateUtils.safe_create_from_min_value(year, month, day,
                                                     begin_hour, 0, 0),
                DateUtils.safe_create_from_min_value(year, month, day,
                                                     end_hour, end_min,
                                                     end_min)
            ]

            result.success = True
            return result

        # handle Date followed by morning, afternoon
        match = regex.search(self.config.time_of_day_regex, trimmed_source)
        if match:
            before_str = trimmed_source[0:match.start()].strip()
            extracted_results = self.single_date_extractor.extract(
                before_str, reference)

            if len(extracted_results
                   ) == 0 or extracted_results[0].length != len(before_str):
                return result

            parse_result = self.config.date_parser.parse(
                extracted_results[0], reference)
            future_date = parse_result.value.future_value
            past_date = parse_result.value.past_value

            result.timex = parse_result.timex_str + time_str

            result.future_value = (DateUtils.safe_create_from_min_value(
                future_date.year, future_date.month, future_date.day,
                begin_hour, 0, 0),
                                   DateUtils.safe_create_from_min_value(
                                       future_date.year, future_date.month,
                                       future_date.day, end_hour, end_min,
                                       end_min))

            result.past_value = (DateUtils.safe_create_from_min_value(
                past_date.year, past_date.month, past_date.day, begin_hour, 0,
                0),
                                 DateUtils.safe_create_from_min_value(
                                     past_date.year, past_date.month,
                                     past_date.day, end_hour, end_min,
                                     end_min))

            result.success = True
            return result

        return result
Ejemplo n.º 13
0
    def _parse_simple_cases(self, source: str,
                            reference: datetime) -> DateTimeResolutionResult:
        result = DateTimeResolutionResult()
        year = reference.year
        month = reference.month
        no_year = False
        input_year = False

        match = regex.search(self.config.simple_cases_regex, source)

        if not match or match.start() != 0 or len(
                match.group()) != len(source):
            return result

        days = RegExpUtility.get_group_list(match, 'day')
        begin_day = self.config.day_of_month[days[0]]
        end_day = self.config.day_of_month[days[1]]

        month_str = RegExpUtility.get_group(match, 'month')

        if month_str.strip() != '':
            month = self.config.month_of_year[month_str]
        else:
            month_str = RegExpUtility.get_group(match, 'relmonth')
            month += self.config.get_swift_day_or_month(month_str)

            if month < 0:
                month = 0
                year -= 1
            elif month > 11:
                month = 11
                year += 1

        year_str = RegExpUtility.get_group(match, 'year')
        if year_str.strip() != '':
            year = int(year_str)
            input_year = True
        else:
            no_year = True

        begin_date_luis = DateTimeFormatUtil.luis_date(
            year if input_year or self.config.is_future(month_str) else -1,
            month, begin_day)
        end_date_luis = DateTimeFormatUtil.luis_date(
            year if input_year or self.config.is_future(month_str) else -1,
            month, end_day)

        future_year = year
        past_year = year

        start_date = DateUtils.safe_create_from_min_value(
            year, month, begin_day)

        if no_year and start_date < reference:
            future_year += 1

        if no_year and start_date >= reference:
            past_year -= 1

        result.timex = f'({begin_date_luis},{end_date_luis},P{end_day - begin_day}D)'

        result.future_value = [
            DateUtils.safe_create_from_min_value(future_year, month,
                                                 begin_day),
            DateUtils.safe_create_from_min_value(future_year, month, end_day)
        ]
        result.past_value = [
            DateUtils.safe_create_from_min_value(past_year, month, begin_day),
            DateUtils.safe_create_from_min_value(past_year, month, end_day)
        ]

        result.success = True
        return result
Ejemplo n.º 14
0
 def __init__(self):
     self._datetime_period_unit_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.DateTimePeriodUnitRegex)
     self._after_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.AfterRegex)
     self._before_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.BeforeRegex)
     self._date_unit_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.UnitRegex)
     self._next_prefix_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.NextPrefixRegex)
     self._last_prefix_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.LastPrefixRegex)
     self._this_prefix_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.ThisPrefixRegex)
     self._date_year_in_chinese_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.DateYearInCJKRegex)
     self._zero_to_nine_integer_regex_chinese = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.ZeroToNineIntegerRegexCJK)
     self._relative_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.RelativeRegex)
     self._year_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.YearRegex)
     self._month_num_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.MonthNumRegex)
     self._day_regex_num_in_chinese = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.DayRegexNumInCJK)
     self._date_day_regex_in_chinese = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.DateDayRegexInCJK)
     self._day_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.DayRegex)
     self._month_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.MonthRegex)
     self._date_regex_list = [
         RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList1),
         RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList2),
         RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList3),
         RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList4),
         RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList5),
         RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList6),
         RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList7),
         RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList8)
     ]
     self._implicit_date_list = [
         RegExpUtility.get_safe_reg_exp(ChineseDateTime.LunarRegex),
         RegExpUtility.get_safe_reg_exp(ChineseDateTime.SpecialDayRegex),
         RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateThisRegex),
         RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateLastRegex),
         RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateNextRegex),
         RegExpUtility.get_safe_reg_exp(ChineseDateTime.WeekDayRegex),
         RegExpUtility.get_safe_reg_exp(
             ChineseDateTime.WeekDayOfMonthRegex),
         RegExpUtility.get_safe_reg_exp(ChineseDateTime.SpecialDate)
     ]
     self._range_connector_symbol_regex = RegExpUtility.get_safe_reg_exp(
         BaseDateTime.RangeConnectorSymbolRegex)
     self._check_both_before_after = False
     # TODO When the implementation for these properties is added, change the None values to their respective Regexps
     self._since_year_suffix_regex = None
     self._range_unit_regex = None
     self._in_connector_regex = None
     self._less_than_regex = None
     self._more_than_regex = None
     self._year_suffix = None
     self._month_of_year = None
     self._prefix_article_regex = None
     self._week_day_and_day_regex = None
Ejemplo n.º 15
0
    def parse_implicit_date(self, source: str,
                            reference: datetime) -> DateTimeParseResult:
        trimmed_source = source.strip()
        result = DateTimeResolutionResult()

        # handle "on 12"
        match = regex.search(self.special_date_regex, trimmed_source)
        if match and len(match.group()) == len(trimmed_source):
            day = 0
            month = reference.month
            year = reference.year
            year_str = RegExpUtility.get_group(match, 'thisyear')
            month_str = RegExpUtility.get_group(match, 'thismonth')
            day_str = RegExpUtility.get_group(match, 'day')
            day = self.config.day_of_month.get(day_str, -1)

            has_year = year_str.strip() != ''
            has_month = month_str.strip() != ''

            if has_month:
                if regex.search(self.token_next_regex, month_str):
                    month += 1
                    if month == 12:
                        month = 0
                        year += 1
                elif regex.search(self.token_last_regex, month_str):
                    month -= 1
                    if month == -1:
                        month = 12
                        year -= 1

                if has_year:
                    if regex.search(self.token_next_regex, year_str):
                        year += 1
                    elif regex.search(self.token_last_regex, year_str):
                        year -= 1

            result.timex = FormatUtil.luis_date(year if has_year else -1,
                                                month if has_month else -1,
                                                day)

            future_date: datetime
            past_date: datetime

            if day > self.month_max_days[month]:
                future_date = DateUtils.safe_create_from_min_value(
                    year, month + 1, day)
                past_date = DateUtils.safe_create_from_min_value(
                    year, month - 1, day)
            else:
                future_date = DateUtils.safe_create_from_min_value(
                    year, month, day)
                past_date = DateUtils.safe_create_from_min_value(
                    year, month, day)

                if not has_month:
                    if future_date < reference:
                        future_date += datedelta(months=1)
                    if past_date >= reference:
                        past_date += datedelta(months=-1)
                elif has_month and not has_year:
                    if future_date < reference:
                        future_date += datedelta(years=1)
                    if past_date >= reference:
                        past_date += datedelta(years=-1)

            result.future_value = future_date
            result.past_value = past_date
            result.success = True
            return result

        # handle "today", "the day before yesterday"
        match = regex.match(self.config.special_day_regex, trimmed_source)
        if match and match.start() == 0 and len(
                match.group()) == len(trimmed_source):
            swift = self.config.get_swift_day(match.group())
            value = reference + timedelta(days=swift)

            result.timex = FormatUtil.luis_date_from_datetime(value)
            result.future_value = value
            result.past_value = value
            result.success = True
            return result

        # handle "this Friday"
        match = regex.match(self.config.this_regex, trimmed_source)
        if match and match.start() == 0 and len(
                match.group()) == len(trimmed_source):
            weekday_str = RegExpUtility.get_group(match, 'weekday')
            value = DateUtils.this(reference,
                                   self.config.day_of_week.get(weekday_str))

            result.timex = FormatUtil.luis_date_from_datetime(value)
            result.future_value = value
            result.past_value = value
            result.success = True
            return result

        # handle "next Sunday"
        match = regex.match(self.config.next_regex, trimmed_source)
        if match and match.start() == 0 and len(
                match.group()) == len(trimmed_source):
            weekday_str = RegExpUtility.get_group(match, 'weekday')
            value = DateUtils.next(reference,
                                   self.config.day_of_week.get(weekday_str))

            result.timex = FormatUtil.luis_date_from_datetime(value)
            result.future_value = value
            result.past_value = value
            result.success = True
            return result

        # handle "last Friday", "last mon"
        match = regex.match(self.config.last_regex, trimmed_source)
        if match and match.start() == 0 and len(
                match.group()) == len(trimmed_source):
            weekday_str = RegExpUtility.get_group(match, 'weekday')
            value = DateUtils.last(reference,
                                   self.config.day_of_week.get(weekday_str))

            result.timex = FormatUtil.luis_date_from_datetime(value)
            result.future_value = value
            result.past_value = value
            result.success = True
            return result

        # handle "Friday"
        match = regex.match(self.config.week_day_regex, trimmed_source)
        if match and match.start() == 0 and len(
                match.group()) == len(trimmed_source):
            weekday_str = RegExpUtility.get_group(match, 'weekday')
            weekday = self.config.day_of_week.get(weekday_str)
            value = DateUtils.this(reference, weekday)

            if weekday == 0:
                weekday = 7

            if weekday < reference.isoweekday():
                value = DateUtils.next(reference, weekday)

            result.timex = 'XXXX-WXX-' + str(weekday)
            future_date = value
            past_date = value

            if future_date < reference:
                future_date += timedelta(weeks=1)

            if past_date >= reference:
                past_date -= timedelta(weeks=1)

            result.future_value = future_date
            result.past_value = past_date
            result.success = True
            return result

        return result
Ejemplo n.º 16
0
    def __init__(self):
        self._date_regex = [
            RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList1),
            RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList2),
            RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList3),
            # 2015-12-23 - This regex represents the standard format in Chinese dates (YMD) and has precedence over other orderings
            RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList8)
        ]

        # Regex precedence where the order between D and M varies is controlled by DefaultLanguageFallback
        if ChineseDateTime.DefaultLanguageFallback == Constants.DEFAULT_LANGUAGE_FALLBACK_DMY:
            order_regex_list = [
                ChineseDateTime.DateRegexList5, ChineseDateTime.DateRegexList4
            ]
        else:
            order_regex_list = [
                ChineseDateTime.DateRegexList4, ChineseDateTime.DateRegexList5
            ]

        if ChineseDateTime.DefaultLanguageFallback in [
                Constants.DEFAULT_LANGUAGE_FALLBACK_DMY,
                Constants.DEFAULT_LANGUAGE_FALLBACK_YMD
        ]:
            order_regex_list.extend([
                ChineseDateTime.DateRegexList7, ChineseDateTime.DateRegexList6
            ])
        else:
            order_regex_list.extend([
                ChineseDateTime.DateRegexList6, ChineseDateTime.DateRegexList7
            ])
        self._date_regex.extend(
            [RegExpUtility.get_safe_reg_exp(ii) for ii in order_regex_list])

        self._month_of_year = ChineseDateTime.ParserConfigurationMonthOfYear
        self._day_of_month = ChineseDateTime.ParserConfigurationDayOfMonth
        self._day_of_week = ChineseDateTime.ParserConfigurationDayOfWeek
        self._special_day_regex = RegExpUtility.get_safe_reg_exp(
            ChineseDateTime.SpecialDayRegex)
        self._special_day_with_num_regex = RegExpUtility.get_safe_reg_exp(
            ChineseDateTime.SpecialDayWithNumRegex)
        self._this_regex = RegExpUtility.get_safe_reg_exp(
            ChineseDateTime.DateThisRegex)
        self._next_regex = RegExpUtility.get_safe_reg_exp(
            ChineseDateTime.DateNextRegex)
        self._last_regex = RegExpUtility.get_safe_reg_exp(
            ChineseDateTime.DateLastRegex)
        self._unit_regex = RegExpUtility.get_safe_reg_exp(
            ChineseDateTime.DateUnitRegex)
        self._unit_map = ChineseDateTime.ParserConfigurationUnitMap
        self._week_day_of_month_regex = RegExpUtility.get_safe_reg_exp(
            ChineseDateTime.WeekDayOfMonthRegex)
        self._week_day_regex = RegExpUtility.get_safe_reg_exp(
            ChineseDateTime.WeekDayRegex)
        self._dynasty_year_regex = RegExpUtility.get_safe_reg_exp(
            ChineseDateTime.DynastyYearRegex)
        self._dynasty_year_map = ChineseDateTime.DynastyYearMap
        self._integer_extractor = ChineseIntegerExtractor()
        self._number_parser = CJKNumberParser(
            ChineseNumberParserConfiguration())
        self._date_extractor = None
        self._dynasty_start_year = ChineseDateTime.DynastyStartYear
Ejemplo n.º 17
0
 def __init__(self):
     super().__init__()
     self._past_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.PastRegex)
     self._future_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.FutureRegex)
     self._number_combined_with_unit = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.NumberCombinedWithUnit)
     self._this_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.DateThisRegex)
     self._last_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.DateLastRegex)
     self._next_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.DateNextRegex)
     self._zhijian_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.ZhijianRegex)
     self._hour_num_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.HourNumRegex)
     self._hour_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.HourRegex)
     self._cardinal_extractor = ChineseCardinalExtractor()
     self._single_date_extractor = ChineseDateExtractor()
     self._single_time_extractor = ChineseTimeExtractor()
     self._single_date_time_extractor = ChineseDateTimeExtractor()
     self._preposition_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.DateTimePeriodPrepositionRegex)
     self._till_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.DateTimePeriodTillRegex)
     self._specific_time_of_day_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.SpecificTimeOfDayRegex)
     self._time_of_day_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.TimeOfDayRegex)
     self._followed_unit = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.DateTimePeriodFollowedUnit)
     self._time_unit_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.DateTimePeriodUnitRegex)
     # TODO When the implementation for these properties is added, change the None values to their respective Regexps
     self._check_both_before_after = None
     self._suffix_regex = None
     self._after_regex = None
     self._before_regex = None
     self._prefix_day_regex = None
     self._pm_desc_regex = None
     self._am_desc_regex = None
     self._date_unit_regex = None
     self._future_suffix_regex = None
     self._within_next_prefix_regex = None
     self._token_before_date = None
Ejemplo n.º 18
0
 def __init__(self, regex_true, regex_false, token_regex, only_top_match):
     self.regex_true = RegExpUtility.get_safe_reg_exp(regex_true)
     self.regex_false = RegExpUtility.get_safe_reg_exp(regex_false)
     self.token_regex = RegExpUtility.get_safe_reg_exp(token_regex)
     self.only_top_match = only_top_match
Ejemplo n.º 19
0
 def __init__(self, options: DateTimeOptions):
     super().__init__(ChineseMergedExtractorConfiguration(), options)
     self.day_of_month_regex = RegExpUtility.get_safe_reg_exp(
         '^\\d{1,2}号', regex.I)
 def __init__(self):
     self._date_extractor = ChineseDateExtractor()
     self._date_parser = ChineseDateParser()
     self._duration_extractor = ChineseDurationExtractor()
     self._simple_cases_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.SimpleCasesRegex)
     self._one_word_period_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.OneWordPeriodRegex)
     self._year_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.DatePeriodYearRegex)
     self._past_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.PastRegex)
     self._future_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.FutureRegex)
     self._week_of_month_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.WeekOfMonthRegex)
     self._quarter_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.QuarterRegex)
     self._season_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.SeasonRegex)
     self._next_prefix_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.DatePeriodNextRegex)
     self._past_prefix_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.DatePeriodLastRegex)
     self._this_prefix_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.DatePeriodThisRegex)
     self._later_early_period_regex = RegExpUtility.get_safe_reg_exp(r'\0')
     self._week_with_week_day_range_regex = RegExpUtility.get_safe_reg_exp(
         r'\0')
     self._token_before_date = ' on '
     self._day_of_month = ChineseDateTime.ParserConfigurationDayOfMonth
     self._month_of_year = ChineseDateTime.ParserConfigurationMonthOfYear
     self._cardinal_map = ChineseDateTime.ParserConfigurationCardinalMap
     self._season_map = ChineseDateTime.ParserConfigurationSeasonMap
     self._unit_map = ChineseDateTime.ParserConfigurationUnitMap
    def __init__(self):
        self._season_regex = RegExpUtility.get_safe_reg_exp(
            ChineseDateTime.SeasonRegex
        )
        self._month_suffix_regex = RegExpUtility.get_safe_reg_exp(
            ChineseDateTime.MonthSuffixRegex
        )
        self._year_regex_in_number = RegExpUtility.get_safe_reg_exp(
            ChineseDateTime.YearRegexInNumber
        )
        self._strict_year_regex = RegExpUtility.get_safe_reg_exp(
            ChineseDateTime.StrictYearRegex
        )
        self._last_regex = RegExpUtility.get_safe_reg_exp(
            ChineseDateTime.DatePeriodLastRegex
        )
        self._next_regex = RegExpUtility.get_safe_reg_exp(
            ChineseDateTime.DatePeriodNextRegex
        )
        self._this_regex = RegExpUtility.get_safe_reg_exp(
            ChineseDateTime.DatePeriodThisRegex
        )
        self._month_regex = RegExpUtility.get_safe_reg_exp(
            ChineseDateTime.MonthRegex
        )
        self._zero_to_nine_integer_regex_chinese = RegExpUtility.get_safe_reg_exp(
            ChineseDateTime.ZeroToNineIntegerRegexCJK
        )
        self._relative_month_regex = RegExpUtility.get_safe_reg_exp(
            ChineseDateTime.RelativeMonthRegex
        )
        self._day_regex_in_chinese = RegExpUtility.get_safe_reg_exp(
            ChineseDateTime.DatePeriodDayRegexInCJK
        )
        self._day_regex = RegExpUtility.get_safe_reg_exp(
            ChineseDateTime.DayRegex
        )
        self._simple_cases_regexes = [
            RegExpUtility.get_safe_reg_exp(ChineseDateTime.SimpleCasesRegex),
            RegExpUtility.get_safe_reg_exp(ChineseDateTime.OneWordPeriodRegex),
            RegExpUtility.get_safe_reg_exp(ChineseDateTime.StrictYearRegex),
            RegExpUtility.get_safe_reg_exp(ChineseDateTime.YearToYear),
            RegExpUtility.get_safe_reg_exp(
                ChineseDateTime.YearToYearSuffixRequired),
            RegExpUtility.get_safe_reg_exp(ChineseDateTime.MonthToMonth),
            RegExpUtility.get_safe_reg_exp(ChineseDateTime.MonthToMonthSuffixRequired),
            RegExpUtility.get_safe_reg_exp(ChineseDateTime.YearAndMonth),
            RegExpUtility.get_safe_reg_exp(
                ChineseDateTime.PureNumYearAndMonth),
            RegExpUtility.get_safe_reg_exp(
                ChineseDateTime.DatePeriodYearInCJKRegex),
            RegExpUtility.get_safe_reg_exp(ChineseDateTime.WeekOfMonthRegex),
            RegExpUtility.get_safe_reg_exp(ChineseDateTime.SeasonWithYear),
            RegExpUtility.get_safe_reg_exp(ChineseDateTime.QuarterRegex),
            RegExpUtility.get_safe_reg_exp(ChineseDateTime.DecadeRegex)
        ]
        self._illegal_year_regex = RegExpUtility.get_safe_reg_exp(
            BaseDateTime.IllegalYearRegex)
        self._year_regex = RegExpUtility.get_safe_reg_exp(
            ChineseDateTime.YearRegex)
        self._till_regex = RegExpUtility.get_safe_reg_exp(
            ChineseDateTime.DatePeriodTillRegex)
        self._followed_unit = RegExpUtility.get_safe_reg_exp(
            ChineseDateTime.FollowedUnit)
        self._number_combined_with_unit = RegExpUtility.get_safe_reg_exp(
            ChineseDateTime.NumberCombinedWithUnit)
        self._past_regex = RegExpUtility.get_safe_reg_exp(
            ChineseDateTime.PastRegex)
        self._future_regex = RegExpUtility.get_safe_reg_exp(
            ChineseDateTime.FutureRegex)
        self._date_point_extractor = ChineseDateExtractor()
        self._integer_extractor = ChineseNumberExtractor()
        self._number_parser = BaseNumberParser(
            ChineseNumberParserConfiguration())
        self._now_regex = RegExpUtility.get_safe_reg_exp(
            ChineseDateTime.NowRegex)
        self._month_num_regex = RegExpUtility.get_safe_reg_exp(
            ChineseDateTime.MonthNumRegex)
        self._cardinal_extractor = ChineseCardinalExtractor()
        self._ordinal_extractor = ChineseOrdinalExtractor()

        # TODO When the implementation for these properties is added, change the None values to their respective Regexps
        self._previous_prefix_regex = None
        self._check_both_before_after = None
        self._century_suffix_regex = None
        self._year_period_regex = None
        self._duration_date_restrictions = None
        self._more_than_regex = None
        self._less_than_regex = None
        self._later_regex = None
        self._ago_regex = None
        self._future_suffix_regex = None
        self._within_next_prefix_regex = None
        self._time_unit_regex = None
        self._previous_prefix_regex = None
 def __init__(self):
     self._complex_dateperiod_regex = None
     self._relative_decade_regex = None
     self._relative_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.RelativeRegex)
     self._date_extractor = ChineseDateExtractor()
     self._date_parser = ChineseDateParser()
     self._duration_extractor = ChineseDurationExtractor()
     self._simple_cases_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.SimpleCasesRegex)
     self._one_word_period_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.OneWordPeriodRegex)
     self._year_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.YearRegex)
     self._past_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.PastRegex)
     self._future_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.FutureRegex)
     self._week_of_month_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.WeekOfMonthRegex)
     self._quarter_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.QuarterRegex)
     self._season_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.SeasonRegex)
     self._next_prefix_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.DatePeriodNextRegex)
     self._past_prefix_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.DatePeriodLastRegex)
     self._this_prefix_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.DatePeriodThisRegex)
     self._later_early_period_regex = RegExpUtility.get_safe_reg_exp(r'\0')
     self._week_with_week_day_range_regex = RegExpUtility.get_safe_reg_exp(
         r'\0')
     self._token_before_date = ' on '
     self._day_of_month = ChineseDateTime.ParserConfigurationDayOfMonth
     self._month_of_year = ChineseDateTime.ParserConfigurationMonthOfYear
     self._cardinal_map = ChineseDateTime.ParserConfigurationCardinalMap
     self._season_map = ChineseDateTime.ParserConfigurationSeasonMap
     self._unit_map = ChineseDateTime.ParserConfigurationUnitMap
     self._now_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.NowRegex)
     # TODO When the implementation for these properties is added, change the None values to their respective Regexps
     self._reference_date_period_regex = RegExpUtility.get_safe_reg_exp(
         r'\0')
     self._decade_with_century_regex = None
     self._later_regex = None
     self._ago_regex = None
Ejemplo n.º 23
0
 def __init__(self):
     super().__init__(ChineseDatePeriodParserConfiguration())
     self.integer_extractor = ChineseIntegerExtractor()
     self.number_parser = CJKNumberParser(
         ChineseNumberParserConfiguration())
     self.year_in_chinese_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.DatePeriodYearInChineseRegex)
     self.number_combined_with_unit_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.NumberCombinedWithUnit)
     self.unit_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.UnitRegex)
     self.year_and_month_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.YearAndMonth)
     self.pure_number_year_and_month_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.PureNumYearAndMonth)
     self.year_to_year_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.YearToYear)
     self.year_to_year_suffix_required = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.YearToYearSuffixRequired)
     self.chinese_year_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.DatePeriodYearInChineseRegex)
     self.season_with_year_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.SeasonWithYear)
     self.decade_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.DecadeRegex)
     self.date_this_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.DatePeriodThisRegex)
     self.date_last_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.DatePeriodLastRegex)
     self.date_next_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.DatePeriodNextRegex)
Ejemplo n.º 24
0
class ChineseDateTimeExtractor(BaseDateTimeExtractor):
    before_regex = RegExpUtility.get_safe_reg_exp(ChineseDateTime.BeforeRegex)
    after_regex = RegExpUtility.get_safe_reg_exp(ChineseDateTime.AfterRegex)
    date_time_period_unit_regex = RegExpUtility.get_safe_reg_exp(
        ChineseDateTime.DateTimePeriodUnitRegex)

    def __init__(self):
        super().__init__(ChineseDateTimeExtractorConfiguration())
        self.duration_extractor = ChineseDurationExtractor()

    def extract(self,
                source: str,
                reference: datetime = None) -> List[ExtractResult]:

        if reference is None:
            reference = datetime.now()

        tokens: List[Token] = list()
        tokens.extend(self.merge_date_and_time(source, reference))
        tokens.extend(self.basic_regex_match(source))
        tokens.extend(self.time_of_today(source, reference))
        tokens.extend(self.duration_with_ago_and_later(source, reference))

        result = merge_all_tokens(tokens, source, self.extractor_type_name)
        return result

    def merge_date_and_time(self, source: str,
                            reference: datetime) -> List[Token]:
        tokens: List[Token] = list()
        ers: List[ExtractResult] = self.config.date_point_extractor.extract(
            source, reference)

        if len(ers) < 1:
            return tokens

        ers.extend(self.config.time_point_extractor.extract(source, reference))

        if len(ers) < 2:
            return tokens

        ers = sorted(ers, key=lambda x: x.start)
        i = 0

        while i < len(ers) - 1:
            j = i + 1

            while j < len(ers) and ers[i].overlap(ers[j]):
                j += 1

            if j >= len(ers):
                break

            if ers[i].type is Constants.SYS_DATETIME_DATE and ers[
                    j].type is Constants.SYS_DATETIME_TIME:
                middle_begin = ers[i].start + ers[i].length
                middle_end = ers[j].start

                if middle_begin > middle_end:
                    continue

                middle = source[middle_begin:middle_end].strip().lower()

                if self.config.is_connector_token(middle):
                    begin = ers[i].start
                    end = ers[j].start + ers[j].length
                    tokens.append(Token(begin, end))
                i = j + 1
                continue
            i = j

        return tokens

    def time_of_today(self, source: str, reference: datetime) -> List[Token]:
        tokens: List[Token] = list()
        ers = self.config.time_point_extractor.extract(source, reference)

        for er in ers:
            before = source[:er.start]
            inner_match = regex.search(self.config.night_regex, er.text)

            if inner_match is not None and inner_match.start() == 0:
                before = source[:er.start + len(inner_match.group())]

            if not before:
                continue

            match = regex.search(self.config.time_of_today_before_regex,
                                 before)
            if match is not None and not before[match.end():].strip():
                begin = match.start()
                end = er.start + er.length
                tokens.append(Token(begin, end))

        return tokens

    def duration_with_ago_and_later(self, source: str,
                                    reference: datetime) -> List[Token]:
        ret: List[Token] = list()
        duration_er = self.duration_extractor.extract(source, reference)

        for er in duration_er:
            pos = er.start + er.length
            if pos < len(source):
                suffix = source[pos]
                before_match = RegExpUtility.get_matches(
                    self.before_regex, suffix)
                after_match = RegExpUtility.get_matches(
                    self.after_regex, suffix)

                if (before_match and suffix.startswith(before_match[0])) \
                        or (after_match and suffix.startswith(after_match[0])):
                    meta_data = MetaData()
                    meta_data.is_duration_with_ago_and_later = True
                    ret.append(Token(er.start, pos + 1, meta_data))
        return ret
Ejemplo n.º 25
0
    def _parse_decade(self, source: str,
                      reference: datetime) -> DateTimeResolutionResult:
        result = DateTimeResolutionResult()

        century = int(reference.year / 100) + 1
        decade_last_year = 10
        input_century = False

        match = regex.search(self.decade_regex, source)

        if not match or len(match.group()) != len(source):
            return result

        decade_str = RegExpUtility.get_group(match, Constants.DECADE)
        decade = self.__convert_chinese_to_number(decade_str)
        century_str = RegExpUtility.get_group(match, Constants.CENTURY)
        if century_str != "":
            century = self.__convert_chinese_to_number(century_str)
            input_century = True
        else:
            century_str = RegExpUtility.get_group(match, Constants.REL_CENTURY)
            if century_str != "":
                century_str = century_str.strip().lower()

                this_match = regex.search(self.date_this_regex, century_str)
                next_match = regex.search(self.date_next_regex, century_str)
                last_match = regex.search(self.date_last_regex, century_str)

                if next_match:
                    century += 1
                elif last_match:
                    century -= 1

                input_century = True

        begin_year = ((century - 1) * 100) + decade
        end_year = begin_year + decade_last_year

        if input_century:
            begin_luis_str = DateTimeFormatUtil.luis_date(begin_year, 1, 1)
            end_luis_str = DateTimeFormatUtil.luis_date(end_year, 1, 1)
        else:
            begin_year_str = "XX{:02d}".format(decade)
            begin_luis_str = DateTimeFormatUtil.luis_date(-1, 1, 1)
            begin_luis_str = begin_luis_str.replace("XXXX", begin_year_str)

            end_year_str = "XX{:02d}".format(end_year % 100)
            end_luis_str = DateTimeFormatUtil.luis_date(-1, 1, 1)
            end_luis_str = end_luis_str.replace("XXXX", end_year_str)

        result.timex = f"({begin_luis_str},{end_luis_str},P10Y)"

        future_year, past_year = begin_year, begin_year
        start_date = DateUtils.safe_create_from_min_value(begin_year, 1, 1)
        if not input_century and start_date < reference:
            future_year += 100
        if not input_century and start_date >= reference:
            past_year -= 100

        result.future_value = [
            DateUtils.safe_create_from_min_value(future_year, 1, 1),
            DateUtils.safe_create_from_min_value(
                future_year + decade_last_year, 1, 1)
        ]
        result.past_value = [
            DateUtils.safe_create_from_min_value(past_year, 1, 1),
            DateUtils.safe_create_from_min_value(past_year + decade_last_year,
                                                 1, 1)
        ]
        result.success = True

        return result
Ejemplo n.º 26
0
 def __init__(self):
     self._holiday_regexes = [
         RegExpUtility.get_safe_reg_exp(ChineseDateTime.HolidayRegexList1),
         RegExpUtility.get_safe_reg_exp(ChineseDateTime.HolidayRegexList2),
         RegExpUtility.get_safe_reg_exp(ChineseDateTime.LunarHolidayRegex)
     ]
Ejemplo n.º 27
0
    def parser_duration_with_ago_and_later(
            self, source: str,
            reference: datetime) -> DateTimeResolutionResult:
        result = DateTimeResolutionResult()
        duration_res = self.duration_extractor.extract(
            source, reference).pop() if self.duration_extractor.extract(
                source, reference) else []
        if duration_res:
            match = self.config._unit_regex.search(source)
            if match:
                suffix = source[duration_res.start + duration_res.length:]
                src_unit = RegExpUtility.get_group(match, 'unit')

                number_str = source[duration_res.start:match.lastindex -
                                    duration_res.start + 1]
                number = self.parse_chinese_written_number_to_value(number_str)

                if src_unit in self.config.unit_map:
                    unit_str = self.config.unit_map.get(src_unit)

                    before_match = RegExpUtility.get_matches(
                        ChineseDateExtractor.before_regex, suffix)
                    if before_match and suffix.startswith(before_match[0]):
                        if unit_str == Constants.TIMEX_DAY:
                            date = reference + timedelta(days=-number)
                        elif unit_str == Constants.TIMEX_WEEK:
                            date = reference + timedelta(days=-7 * number)
                        elif unit_str == Constants.TIMEX_MONTH_FULL:
                            date = reference.replace(month=reference.month - 1)
                        elif unit_str == Constants.TIMEX_YEAR:
                            date = reference.replace(year=reference.year - 1)
                        else:
                            return result

                        result.timex = DateTimeFormatUtil.luis_date_from_datetime(
                            date)
                        result.future_value = result.past_value = date
                        result.success = True
                        return result

                    after_match = RegExpUtility.get_matches(
                        ChineseDateExtractor.after_regex, suffix)
                    if after_match and suffix.startswith(after_match[0]):
                        if unit_str == Constants.TIMEX_DAY:
                            date = reference + timedelta(days=number)
                        elif unit_str == Constants.TIMEX_WEEK:
                            date = reference + timedelta(days=7 * number)
                        elif unit_str == Constants.TIMEX_MONTH_FULL:
                            date = reference.replace(month=reference.month + 1)
                        elif unit_str == Constants.TIMEX_YEAR:
                            date = reference.replace(year=reference.year + 1)
                        else:
                            return result

                        result.timex = DateTimeFormatUtil.luis_date_from_datetime(
                            date)
                        result.future_value = result.past_value = date
                        result.success = True
                        return result

        return result
Ejemplo n.º 28
0
    def __init__(self):
        self._date_regex_list = [
            RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList1),
            RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList2),
            RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList3),
            RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList4),
            RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList5)
        ]

        if ChineseDateTime.DefaultLanguageFallback == Constants.DEFAULT_LANGUAGE_FALLBACK_DMY:
            self._date_regex_list.append(
                RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList7))
            self._date_regex_list.append(
                RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList6))
        else:
            self._date_regex_list.append(
                RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList6))
            self._date_regex_list.append(
                RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList7))

        self._date_regex_list.append(
            RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList8))

        self._implicit_date_list = [
            RegExpUtility.get_safe_reg_exp(ChineseDateTime.LunarRegex),
            RegExpUtility.get_safe_reg_exp(ChineseDateTime.SpecialDayRegex),
            RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateThisRegex),
            RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateLastRegex),
            RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateNextRegex),
            RegExpUtility.get_safe_reg_exp(ChineseDateTime.WeekDayRegex),
            RegExpUtility.get_safe_reg_exp(
                ChineseDateTime.WeekDayOfMonthRegex),
            RegExpUtility.get_safe_reg_exp(ChineseDateTime.SpecialDate)
        ]
Ejemplo n.º 29
0
 def __init__(self):
     super().__init__(ChineseTimePeriodParserConfiguration())
     self.day_description_regex = RegExpUtility.get_safe_reg_exp(ChineseDateTime.TimeDayDescRegex)
     self.only_digit_match = RegExpUtility.get_safe_reg_exp(r'\d+')
     self.numbers_map = ChineseDateTime.TimeNumberDictionary
     self.low_bound_map = ChineseDateTime.TimeLowBoundDesc
Ejemplo n.º 30
0
 def __init__(self):
     super().__init__(dict([
         (RegExpUtility.get_safe_reg_exp(ChineseDateTime.TimeRegexes1), TimeType.ChineseTime),
         (RegExpUtility.get_safe_reg_exp(ChineseDateTime.TimeRegexes2), TimeType.DigitTime),
         (RegExpUtility.get_safe_reg_exp(ChineseDateTime.TimeRegexes3), TimeType.LessTime)
     ]))