def _parse_duration(self, source: str, reference: datetime) -> DateTimeResolutionResult: result = DateTimeResolutionResult() # for case "前两年" "后三年" duration_result = next(iter(self.config.duration_extractor.extract(source, reference)), None) if not duration_result: return result match = regex.search(self.unit_regex, duration_result.text) if not match: return result source_unit = RegExpUtility.get_group(match, 'unit').strip().lower() if source_unit not in self.config.unit_map: return result before_str = source[:duration_result.start].strip().lower() number_str = duration_result.text[:match.start()].strip().lower() number_val = self.__convert_chinese_to_number(number_str) num_str = str(number_val) return self.__parse_common_duration_with_unit(before_str, source_unit, num_str, reference)
def __init__(self): self._cardinal_extractor = ChineseCardinalExtractor() self._single_date_extractor = ChineseDateExtractor() self._single_time_extractor = ChineseTimeExtractor() self._single_date_time_extractor = ChineseDateTimeExtractor() self._preposition_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DateTimePeriodPrepositionRegex) self._till_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DateTimePeriodTillRegex) self._specific_time_of_day_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.SpecificTimeOfDayRegex) self._time_of_day_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.TimeOfDayRegex) self._followed_unit = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DateTimePeriodFollowedUnit) self._time_unit_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DateTimePeriodUnitRegex)
def __init__(self): super().__init__(ChineseDateTimePeriodParserConfiguration()) self.tmo_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DateTimePeriodMORegex) self.taf_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DateTimePeriodAFRegex) self.tev_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DateTimePeriodEVRegex) self.tni_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DateTimePeriodNIRegex) self.unit_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DateTimePeriodUnitRegex) self.time_of_day_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.TimeOfDayRegex) self.cardinal_extractor = ChineseCardinalExtractor() self.cardinal_parser = CJKNumberParser( ChineseNumberParserConfiguration())
def __init__(self): self._equal_regex = RegExpUtility.get_safe_reg_exp( BaseDateTime.EqualRegex ) self._since_suffix_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.ParserConfigurationSinceSuffix ) self._since_prefix_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.ParserConfigurationSincePrefix ) self._until_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.ParserConfigurationUntil ) self._after_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.AfterRegex ) self._before_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.BeforeRegex ) self._ambiguity_filters_dict = ChineseDateTime.AmbiguityFiltersDict self._date_extractor = ChineseDateExtractor() self._time_extractor = ChineseTimeExtractor() self._date_time_extractor = ChineseDateTimeExtractor() self._date_period_extractor = ChineseDatePeriodExtractor() self._time_period_extractor = ChineseTimePeriodExtractor() self._date_time_period_extractor = ChineseDateTimePeriodExtractor() self._holiday_extractor = BaseHolidayExtractor( ChineseHolidayExtractorConfiguration()) self._duration_extractor = ChineseDurationExtractor() self._set_extractor = ChineseSetExtractor() # TODO When the implementation for these properties is added, change the None values to their respective Regexps self._superfluous_word_matcher = None self._fail_fast_regex = None self._unspecified_date_period_regex = None self._suffix_after_regex = None self._potential_ambiguous_range_regex = None self._ambiguous_range_modifier_prefix = None self._around_regex = None self._term_filter_regexes = None self._datetime_alt_extractor = None self._time_zone_extractor = None
def __init__(self): super().__init__() self._datetime_period_unit_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DateTimePeriodUnitRegex) self._after_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.AfterRegex) self._before_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.BeforeRegex) self._date_point_extractor = ChineseDateExtractor() self._time_point_extractor = ChineseTimeExtractor() self._now_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.NowRegex) self._night_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.NightRegex) self._time_of_today_before_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.TimeOfTodayRegex) self._preposition_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.PrepositionRegex) # TODO When the implementation for these properties is added, change the None values to their respective Regexps self._year_regex = None self._year_suffix = None self._suffix_after_regex = None self._date_number_connector_regex = None self._number_as_time_regex = None
def __init__(self): self._simple_cases_regexes = [ RegExpUtility.get_safe_reg_exp(ChineseDateTime.SimpleCasesRegex), RegExpUtility.get_safe_reg_exp(ChineseDateTime.OneWordPeriodRegex), RegExpUtility.get_safe_reg_exp(ChineseDateTime.StrictYearRegex), RegExpUtility.get_safe_reg_exp(ChineseDateTime.YearToYear), RegExpUtility.get_safe_reg_exp( ChineseDateTime.YearToYearSuffixRequired), RegExpUtility.get_safe_reg_exp(ChineseDateTime.YearAndMonth), RegExpUtility.get_safe_reg_exp( ChineseDateTime.PureNumYearAndMonth), RegExpUtility.get_safe_reg_exp( ChineseDateTime.DatePeriodYearInChineseRegex), RegExpUtility.get_safe_reg_exp(ChineseDateTime.WeekOfMonthRegex), RegExpUtility.get_safe_reg_exp(ChineseDateTime.SeasonWithYear), RegExpUtility.get_safe_reg_exp(ChineseDateTime.QuarterRegex), ] self._illegal_year_regex = RegExpUtility.get_safe_reg_exp( BaseDateTime.IllegalYearRegex) self._year_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.YearRegex) self._till_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DatePeriodTillRegex) self._followed_unit = RegExpUtility.get_safe_reg_exp( ChineseDateTime.FollowedUnit) self._number_combined_with_unit = RegExpUtility.get_safe_reg_exp( ChineseDateTime.NumberCombinedWithUnit) self._past_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.PastRegex) self._future_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.FutureRegex) self._date_point_extractor = ChineseDateExtractor() self._integer_extractor = ChineseNumberExtractor() self._number_parser = BaseNumberParser( ChineseNumberParserConfiguration())
def __init__(self): self._time_zone_end_regex = RegExpUtility.get_safe_reg_exp( "time$|timezone$")
def parse_implicit_date(self, source: str, reference: datetime) -> DateTimeResolutionResult: trimmed_source = source.strip() result = DateTimeResolutionResult() # handle "十二日" "明年这个月三日" "本月十一日" match = regex.match(self.special_date_regex, trimmed_source) if match: year_str = RegExpUtility.get_group(match, 'thisyear') month_str = RegExpUtility.get_group(match, 'thismonth') day_str = RegExpUtility.get_group(match, 'day') month = reference.month day = 0 day = self.config.day_of_month[day_str] year = reference.year has_year = False has_month = False if month_str: has_month = True if regex.search(self.token_next_regex, month_str): month += 1 if month == Constants.MAX_MONTH + 1: month = Constants.MIN_MONTH year += 1 elif regex.search(self.token_last_regex, month_str): month -= 1 if month == Constants.MIN_MONTH - 1: month = Constants.MAX_MONTH year -= 1 if year_str: has_year = True if regex.search(self.token_next_regex, year_str): year += 1 elif regex.search(self.token_last_regex, year_str): year -= 1 result.timex = DateTimeFormatUtil.luis_date( year if has_year else -1, month if has_month else -1, day) if day > self.get_month_max_day(year, month): future_month = month + 1 past_month = month - 1 future_year = year past_year = year if future_month == Constants.MAX_MONTH + 1: future_month = Constants.MIN_MONTH future_year = year + 1 if past_month == Constants.MIN_MONTH - 1: past_month = Constants.MAX_MONTH past_year = year - 1 is_future_valid = DateUtils.is_valid_date( future_year, future_month, day) is_past_valid = DateUtils.is_valid_date( past_year, past_month, day) if is_future_valid and is_past_valid: future_date = DateUtils.safe_create_from_min_value( future_year, future_month, day) past_date = DateUtils.safe_create_from_min_value( past_year, past_month, day) elif is_future_valid and not is_past_valid: future_date = past_date = DateUtils.safe_create_from_min_value( future_year, future_month, day) elif not is_future_valid and not is_past_valid: future_date = past_date = DateUtils.safe_create_from_min_value( past_year, past_month, day) else: future_date = past_date = DateUtils.safe_create_from_min_value( year, month, day) else: future_date = DateUtils.safe_create_from_min_value( year, month, day) past_date = DateUtils.safe_create_from_min_value( year, month, day) if not has_month: if future_date < reference: if self.is_valid_date(year, month + 1, day): future_date += datedelta(months=1) if past_date >= reference: if self.is_valid_date(year, month - 1, day): past_date += datedelta(months=-1) elif DateUtils.is_Feb_29th(year, month - 1, day): past_date += datedelta(months=-2) elif not has_year: if future_date < reference: if self.is_valid_date(year + 1, month, day): future_date += datedelta(years=1) if past_date >= reference: if self.is_valid_date(year - 1, month, day): past_date += datedelta(years=-1) result.future_value = future_date result.past_value = past_date result.success = True return result # handle "today", "the day before yesterday" match = regex.match(self.config.special_day_regex, trimmed_source) if match and match.start() == 0 and len( match.group()) == len(trimmed_source): swift = self.config.get_swift_day(match.group()) value = reference + timedelta(days=swift) result.timex = DateTimeFormatUtil.luis_date_from_datetime(value) result.future_value = result.past_value = DateUtils.safe_create_from_min_value( value.year, value.month, value.day) result.success = True return result # handle "this Friday" match = regex.match(self.config.this_regex, trimmed_source) if match and match.start() == 0 and len( match.group()) == len(trimmed_source): weekday_str = RegExpUtility.get_group(match, 'weekday') value = DateUtils.this(reference, self.config.day_of_week.get(weekday_str)) result.timex = DateTimeFormatUtil.luis_date_from_datetime(value) result.future_value = value result.past_value = value result.success = True return result # handle "next Sunday" match = regex.match(self.config.next_regex, trimmed_source) if match and match.start() == 0 and len( match.group()) == len(trimmed_source): weekday_str = RegExpUtility.get_group(match, 'weekday') value = DateUtils.next(reference, self.config.day_of_week.get(weekday_str)) result.timex = DateTimeFormatUtil.luis_date_from_datetime(value) result.future_value = value result.past_value = value result.success = True return result # handle "last Friday", "last mon" match = regex.match(self.config.last_regex, trimmed_source) if match and match.start() == 0 and len( match.group()) == len(trimmed_source): weekday_str = RegExpUtility.get_group(match, 'weekday') value = DateUtils.last(reference, self.config.day_of_week.get(weekday_str)) result.timex = DateTimeFormatUtil.luis_date_from_datetime(value) result.future_value = value result.past_value = value result.success = True return result # handle "Friday" match = regex.match(self.config.week_day_regex, trimmed_source) if match and match.start() == 0 and len( match.group()) == len(trimmed_source): weekday_str = RegExpUtility.get_group(match, 'weekday') weekday = self.config.day_of_week.get(weekday_str) value = DateUtils.this(reference, weekday) if weekday == 0: weekday = 7 if weekday < reference.isoweekday(): value = DateUtils.next(reference, weekday) result.timex = 'XXXX-WXX-' + str(weekday) future_date = value past_date = value if future_date < reference: future_date += timedelta(weeks=1) if past_date >= reference: past_date -= timedelta(weeks=1) result.future_value = future_date result.past_value = past_date result.success = True return result return result
def __init__(self): self._date_regex = [ RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList1), RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList2), RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList3), RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList4), RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList5) ] if ChineseDateTime.DefaultLanguageFallback == Constants.DEFAULT_LANGUAGE_FALLBACK_DMY: self._date_regex.append( RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList7)) self._date_regex.append( RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList6)) else: self._date_regex.append( RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList6)) self._date_regex.append( RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList7)) self._date_regex.append( RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList8)) self._month_of_year = ChineseDateTime.ParserConfigurationMonthOfYear self._day_of_month = ChineseDateTime.ParserConfigurationDayOfMonth self._day_of_week = ChineseDateTime.ParserConfigurationDayOfWeek self._special_day_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.SpecialDayRegex) self._special_day_with_num_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.SpecialDayWithNumRegex) self._this_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DateThisRegex) self._next_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DateNextRegex) self._last_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DateLastRegex) self._unit_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DateUnitRegex) self._unit_map = ChineseDateTime.ParserConfigurationUnitMap self._week_day_of_month_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.WeekDayOfMonthRegex) self._week_day_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.WeekDayRegex) self._integer_extractor = ChineseIntegerExtractor() self._number_parser = CJKNumberParser( ChineseNumberParserConfiguration())
def _parse_year_to_year(self, source: str, reference: datetime) -> DateTimeResolutionResult: result = DateTimeResolutionResult() match = regex.search(self.year_to_year_regex, source) if not match: match = regex.search(self.year_to_year_suffix_required, source) if not match: return result year_matches = list(regex.finditer(self.config.year_regex, source)) chinese_year_matches = list( regex.finditer(self.chinese_year_regex, source)) begin_year = 0 end_year = 0 if len(year_matches) == 2: begin_year = self.__convert_chinese_to_number( RegExpUtility.get_group(year_matches[0], Constants.YEAR_GROUP_NAME)) end_year = self.__convert_chinese_to_number( RegExpUtility.get_group(year_matches[1], Constants.YEAR_GROUP_NAME)) elif len(chinese_year_matches) == 2: begin_year = self._convert_year( RegExpUtility.get_group(chinese_year_matches[0], Constants.YEAR_CHINESE), True) end_year = self._convert_year( RegExpUtility.get_group(chinese_year_matches[1], Constants.YEAR_CHINESE), True) elif len(year_matches) == 1 and len(chinese_year_matches) == 1: if year_matches[0].start() < chinese_year_matches[0].start(): begin_year = self.__convert_chinese_to_number( RegExpUtility.get_group(year_matches[0], Constants.YEAR_GROUP_NAME)) end_year = self.__convert_chinese_to_number( RegExpUtility.get_group(chinese_year_matches[0], Constants.YEAR_CHINESE)) else: begin_year = self.__convert_chinese_to_number( RegExpUtility.get_group(chinese_year_matches[0], Constants.YEAR_CHINESE)) end_year = self.__convert_chinese_to_number( RegExpUtility.get_group(year_matches[0], Constants.YEAR_GROUP_NAME)) begin_year = self.__sanitize_year(begin_year) end_year = self.__sanitize_year(end_year) begin_date = DateUtils.safe_create_from_min_value(begin_year, 1, 1) end_date = DateUtils.safe_create_from_min_value(end_year, 1, 1) result.future_value = [begin_date, end_date] result.past_value = [begin_date, end_date] begin_timex = DateTimeFormatUtil.luis_date_from_datetime(begin_date) end_timex = DateTimeFormatUtil.luis_date_from_datetime(end_date) result.timex = f'({begin_timex},{end_timex},P{end_year - begin_year}Y)' result.success = True return result
def _parse_simple_cases(self, source: str, reference: datetime) -> DateTimeResolutionResult: result = DateTimeResolutionResult() year = reference.year month = reference.month no_year = False input_year = False match = regex.search(self.config.simple_cases_regex, source) if not match or match.start() != 0 or len( match.group()) != len(source): return result days = RegExpUtility.get_group_list(match, Constants.DAY_GROUP_NAME) begin_day = self.config.day_of_month[days[0]] end_day = self.config.day_of_month[days[1]] month_str = RegExpUtility.get_group(match, Constants.MONTH_GROUP_NAME) if month_str.strip() != '': month = self.config.month_of_year[month_str] else: month_str = RegExpUtility.get_group(match, Constants.REL_MONTH) month += self.config.get_swift_day_or_month(month_str) if month < 0: month = 0 year -= 1 elif month > 11: month = 11 year += 1 year_str = RegExpUtility.get_group(match, Constants.YEAR_GROUP_NAME) if year_str.strip() != '': year = int(year_str) input_year = True else: no_year = True begin_date_luis = DateTimeFormatUtil.luis_date( year if input_year or self.config.is_future(month_str) else -1, month, begin_day) end_date_luis = DateTimeFormatUtil.luis_date( year if input_year or self.config.is_future(month_str) else -1, month, end_day) future_past_begin_date = DateUtils.generate_dates( no_year, reference, year, month, begin_day) future_past_end_date = DateUtils.generate_dates( no_year, reference, year, month, end_day) result.timex = f'({begin_date_luis},{end_date_luis},P{end_day - begin_day}D)' result.future_value = [ future_past_begin_date[0], future_past_end_date[0] ] result.past_value = [ future_past_begin_date[1], future_past_end_date[1] ] result.success = True return result
def parse_specific_time_of_day( self, source: str, reference: datetime) -> DateTimeResolutionResult: result = DateTimeResolutionResult() trimmed_source = source.strip() begin_hour = end_hour = end_min = 0 # Handle 昨晚,今晨 if RegExpUtility.is_exact_match(self.config.specific_time_of_day_regex, trimmed_source, True): values = self.config.get_matched_time_range(source) if not values: return result swift = values.swift date = reference.date() + timedelta(days=swift) day = date.day month = date.month year = date.year result.timex = DateTimeFormatUtil.format_date( date) + values.time_str result.future_value = result.past_value = [ DateUtils.safe_create_from_min_value(year, month, day, values.begin_hour, 0, 0), DateUtils.safe_create_from_min_value(year, month, day, values.end_hour, values.end_min, values.end_min) ] result.success = True return result # handle morning, afternoon.. if regex.search(self.tmo_regex, source): time_str = 'TMO' begin_hour = 8 end_hour = 12 elif regex.search(self.tmi_regex, source): time_str = 'TMI' begin_hour = 11 end_hour = 13 elif regex.search(self.taf_regex, source): time_str = 'TAF' begin_hour = Constants.HALF_DAY_HOUR_COUNT end_hour = 16 elif regex.search(self.tev_regex, source): time_str = 'TEV' begin_hour = 16 end_hour = 20 elif regex.search(self.tni_regex, source): time_str = 'TNI' begin_hour = 20 end_hour = 23 end_min = 59 else: return result if RegExpUtility.is_exact_match(self.config.specific_time_of_day_regex, trimmed_source, True): swift = 0 if regex.search(self.config.next_regex, trimmed_source): swift = 1 elif regex.search(self.config.last_regex, trimmed_source): swift = -1 date = reference.date() + timedelta(days=swift) day = date.day month = date.month year = date.year result.timex = DateTimeFormatUtil.format_date(date) + time_str result.future_value = result.past_value = [ DateUtils.safe_create_from_min_value(year, month, day, begin_hour, 0, 0), DateUtils.safe_create_from_min_value(year, month, day, end_hour, end_min, end_min) ] result.success = True return result # handle Date followed by morning, afternoon match = regex.search(self.config.time_of_day_regex, trimmed_source) if match: before_str = trimmed_source[0:match.start()].strip() extracted_results = self.single_date_extractor.extract( before_str, reference) if len(extracted_results ) == 0 or extracted_results[0].length != len(before_str): return result parse_result = self.config.date_parser.parse( extracted_results[0], reference) future_date = parse_result.value.future_value past_date = parse_result.value.past_value result.timex = parse_result.timex_str + time_str result.future_value = (DateUtils.safe_create_from_min_value( future_date.year, future_date.month, future_date.day, begin_hour, 0, 0), DateUtils.safe_create_from_min_value( future_date.year, future_date.month, future_date.day, end_hour, end_min, end_min)) result.past_value = (DateUtils.safe_create_from_min_value( past_date.year, past_date.month, past_date.day, begin_hour, 0, 0), DateUtils.safe_create_from_min_value( past_date.year, past_date.month, past_date.day, end_hour, end_min, end_min)) result.success = True return result return result
def _parse_simple_cases(self, source: str, reference: datetime) -> DateTimeResolutionResult: result = DateTimeResolutionResult() year = reference.year month = reference.month no_year = False input_year = False match = regex.search(self.config.simple_cases_regex, source) if not match or match.start() != 0 or len( match.group()) != len(source): return result days = RegExpUtility.get_group_list(match, 'day') begin_day = self.config.day_of_month[days[0]] end_day = self.config.day_of_month[days[1]] month_str = RegExpUtility.get_group(match, 'month') if month_str.strip() != '': month = self.config.month_of_year[month_str] else: month_str = RegExpUtility.get_group(match, 'relmonth') month += self.config.get_swift_day_or_month(month_str) if month < 0: month = 0 year -= 1 elif month > 11: month = 11 year += 1 year_str = RegExpUtility.get_group(match, 'year') if year_str.strip() != '': year = int(year_str) input_year = True else: no_year = True begin_date_luis = DateTimeFormatUtil.luis_date( year if input_year or self.config.is_future(month_str) else -1, month, begin_day) end_date_luis = DateTimeFormatUtil.luis_date( year if input_year or self.config.is_future(month_str) else -1, month, end_day) future_year = year past_year = year start_date = DateUtils.safe_create_from_min_value( year, month, begin_day) if no_year and start_date < reference: future_year += 1 if no_year and start_date >= reference: past_year -= 1 result.timex = f'({begin_date_luis},{end_date_luis},P{end_day - begin_day}D)' result.future_value = [ DateUtils.safe_create_from_min_value(future_year, month, begin_day), DateUtils.safe_create_from_min_value(future_year, month, end_day) ] result.past_value = [ DateUtils.safe_create_from_min_value(past_year, month, begin_day), DateUtils.safe_create_from_min_value(past_year, month, end_day) ] result.success = True return result
def __init__(self): self._datetime_period_unit_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DateTimePeriodUnitRegex) self._after_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.AfterRegex) self._before_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.BeforeRegex) self._date_unit_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.UnitRegex) self._next_prefix_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.NextPrefixRegex) self._last_prefix_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.LastPrefixRegex) self._this_prefix_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.ThisPrefixRegex) self._date_year_in_chinese_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DateYearInCJKRegex) self._zero_to_nine_integer_regex_chinese = RegExpUtility.get_safe_reg_exp( ChineseDateTime.ZeroToNineIntegerRegexCJK) self._relative_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.RelativeRegex) self._year_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.YearRegex) self._month_num_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.MonthNumRegex) self._day_regex_num_in_chinese = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DayRegexNumInCJK) self._date_day_regex_in_chinese = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DateDayRegexInCJK) self._day_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DayRegex) self._month_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.MonthRegex) self._date_regex_list = [ RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList1), RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList2), RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList3), RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList4), RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList5), RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList6), RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList7), RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList8) ] self._implicit_date_list = [ RegExpUtility.get_safe_reg_exp(ChineseDateTime.LunarRegex), RegExpUtility.get_safe_reg_exp(ChineseDateTime.SpecialDayRegex), RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateThisRegex), RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateLastRegex), RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateNextRegex), RegExpUtility.get_safe_reg_exp(ChineseDateTime.WeekDayRegex), RegExpUtility.get_safe_reg_exp( ChineseDateTime.WeekDayOfMonthRegex), RegExpUtility.get_safe_reg_exp(ChineseDateTime.SpecialDate) ] self._range_connector_symbol_regex = RegExpUtility.get_safe_reg_exp( BaseDateTime.RangeConnectorSymbolRegex) self._check_both_before_after = False # TODO When the implementation for these properties is added, change the None values to their respective Regexps self._since_year_suffix_regex = None self._range_unit_regex = None self._in_connector_regex = None self._less_than_regex = None self._more_than_regex = None self._year_suffix = None self._month_of_year = None self._prefix_article_regex = None self._week_day_and_day_regex = None
def parse_implicit_date(self, source: str, reference: datetime) -> DateTimeParseResult: trimmed_source = source.strip() result = DateTimeResolutionResult() # handle "on 12" match = regex.search(self.special_date_regex, trimmed_source) if match and len(match.group()) == len(trimmed_source): day = 0 month = reference.month year = reference.year year_str = RegExpUtility.get_group(match, 'thisyear') month_str = RegExpUtility.get_group(match, 'thismonth') day_str = RegExpUtility.get_group(match, 'day') day = self.config.day_of_month.get(day_str, -1) has_year = year_str.strip() != '' has_month = month_str.strip() != '' if has_month: if regex.search(self.token_next_regex, month_str): month += 1 if month == 12: month = 0 year += 1 elif regex.search(self.token_last_regex, month_str): month -= 1 if month == -1: month = 12 year -= 1 if has_year: if regex.search(self.token_next_regex, year_str): year += 1 elif regex.search(self.token_last_regex, year_str): year -= 1 result.timex = FormatUtil.luis_date(year if has_year else -1, month if has_month else -1, day) future_date: datetime past_date: datetime if day > self.month_max_days[month]: future_date = DateUtils.safe_create_from_min_value( year, month + 1, day) past_date = DateUtils.safe_create_from_min_value( year, month - 1, day) else: future_date = DateUtils.safe_create_from_min_value( year, month, day) past_date = DateUtils.safe_create_from_min_value( year, month, day) if not has_month: if future_date < reference: future_date += datedelta(months=1) if past_date >= reference: past_date += datedelta(months=-1) elif has_month and not has_year: if future_date < reference: future_date += datedelta(years=1) if past_date >= reference: past_date += datedelta(years=-1) result.future_value = future_date result.past_value = past_date result.success = True return result # handle "today", "the day before yesterday" match = regex.match(self.config.special_day_regex, trimmed_source) if match and match.start() == 0 and len( match.group()) == len(trimmed_source): swift = self.config.get_swift_day(match.group()) value = reference + timedelta(days=swift) result.timex = FormatUtil.luis_date_from_datetime(value) result.future_value = value result.past_value = value result.success = True return result # handle "this Friday" match = regex.match(self.config.this_regex, trimmed_source) if match and match.start() == 0 and len( match.group()) == len(trimmed_source): weekday_str = RegExpUtility.get_group(match, 'weekday') value = DateUtils.this(reference, self.config.day_of_week.get(weekday_str)) result.timex = FormatUtil.luis_date_from_datetime(value) result.future_value = value result.past_value = value result.success = True return result # handle "next Sunday" match = regex.match(self.config.next_regex, trimmed_source) if match and match.start() == 0 and len( match.group()) == len(trimmed_source): weekday_str = RegExpUtility.get_group(match, 'weekday') value = DateUtils.next(reference, self.config.day_of_week.get(weekday_str)) result.timex = FormatUtil.luis_date_from_datetime(value) result.future_value = value result.past_value = value result.success = True return result # handle "last Friday", "last mon" match = regex.match(self.config.last_regex, trimmed_source) if match and match.start() == 0 and len( match.group()) == len(trimmed_source): weekday_str = RegExpUtility.get_group(match, 'weekday') value = DateUtils.last(reference, self.config.day_of_week.get(weekday_str)) result.timex = FormatUtil.luis_date_from_datetime(value) result.future_value = value result.past_value = value result.success = True return result # handle "Friday" match = regex.match(self.config.week_day_regex, trimmed_source) if match and match.start() == 0 and len( match.group()) == len(trimmed_source): weekday_str = RegExpUtility.get_group(match, 'weekday') weekday = self.config.day_of_week.get(weekday_str) value = DateUtils.this(reference, weekday) if weekday == 0: weekday = 7 if weekday < reference.isoweekday(): value = DateUtils.next(reference, weekday) result.timex = 'XXXX-WXX-' + str(weekday) future_date = value past_date = value if future_date < reference: future_date += timedelta(weeks=1) if past_date >= reference: past_date -= timedelta(weeks=1) result.future_value = future_date result.past_value = past_date result.success = True return result return result
def __init__(self): self._date_regex = [ RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList1), RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList2), RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList3), # 2015-12-23 - This regex represents the standard format in Chinese dates (YMD) and has precedence over other orderings RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList8) ] # Regex precedence where the order between D and M varies is controlled by DefaultLanguageFallback if ChineseDateTime.DefaultLanguageFallback == Constants.DEFAULT_LANGUAGE_FALLBACK_DMY: order_regex_list = [ ChineseDateTime.DateRegexList5, ChineseDateTime.DateRegexList4 ] else: order_regex_list = [ ChineseDateTime.DateRegexList4, ChineseDateTime.DateRegexList5 ] if ChineseDateTime.DefaultLanguageFallback in [ Constants.DEFAULT_LANGUAGE_FALLBACK_DMY, Constants.DEFAULT_LANGUAGE_FALLBACK_YMD ]: order_regex_list.extend([ ChineseDateTime.DateRegexList7, ChineseDateTime.DateRegexList6 ]) else: order_regex_list.extend([ ChineseDateTime.DateRegexList6, ChineseDateTime.DateRegexList7 ]) self._date_regex.extend( [RegExpUtility.get_safe_reg_exp(ii) for ii in order_regex_list]) self._month_of_year = ChineseDateTime.ParserConfigurationMonthOfYear self._day_of_month = ChineseDateTime.ParserConfigurationDayOfMonth self._day_of_week = ChineseDateTime.ParserConfigurationDayOfWeek self._special_day_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.SpecialDayRegex) self._special_day_with_num_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.SpecialDayWithNumRegex) self._this_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DateThisRegex) self._next_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DateNextRegex) self._last_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DateLastRegex) self._unit_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DateUnitRegex) self._unit_map = ChineseDateTime.ParserConfigurationUnitMap self._week_day_of_month_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.WeekDayOfMonthRegex) self._week_day_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.WeekDayRegex) self._dynasty_year_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DynastyYearRegex) self._dynasty_year_map = ChineseDateTime.DynastyYearMap self._integer_extractor = ChineseIntegerExtractor() self._number_parser = CJKNumberParser( ChineseNumberParserConfiguration()) self._date_extractor = None self._dynasty_start_year = ChineseDateTime.DynastyStartYear
def __init__(self): super().__init__() self._past_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.PastRegex) self._future_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.FutureRegex) self._number_combined_with_unit = RegExpUtility.get_safe_reg_exp( ChineseDateTime.NumberCombinedWithUnit) self._this_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DateThisRegex) self._last_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DateLastRegex) self._next_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DateNextRegex) self._zhijian_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.ZhijianRegex) self._hour_num_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.HourNumRegex) self._hour_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.HourRegex) self._cardinal_extractor = ChineseCardinalExtractor() self._single_date_extractor = ChineseDateExtractor() self._single_time_extractor = ChineseTimeExtractor() self._single_date_time_extractor = ChineseDateTimeExtractor() self._preposition_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DateTimePeriodPrepositionRegex) self._till_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DateTimePeriodTillRegex) self._specific_time_of_day_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.SpecificTimeOfDayRegex) self._time_of_day_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.TimeOfDayRegex) self._followed_unit = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DateTimePeriodFollowedUnit) self._time_unit_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DateTimePeriodUnitRegex) # TODO When the implementation for these properties is added, change the None values to their respective Regexps self._check_both_before_after = None self._suffix_regex = None self._after_regex = None self._before_regex = None self._prefix_day_regex = None self._pm_desc_regex = None self._am_desc_regex = None self._date_unit_regex = None self._future_suffix_regex = None self._within_next_prefix_regex = None self._token_before_date = None
def __init__(self, regex_true, regex_false, token_regex, only_top_match): self.regex_true = RegExpUtility.get_safe_reg_exp(regex_true) self.regex_false = RegExpUtility.get_safe_reg_exp(regex_false) self.token_regex = RegExpUtility.get_safe_reg_exp(token_regex) self.only_top_match = only_top_match
def __init__(self, options: DateTimeOptions): super().__init__(ChineseMergedExtractorConfiguration(), options) self.day_of_month_regex = RegExpUtility.get_safe_reg_exp( '^\\d{1,2}号', regex.I)
def __init__(self): self._date_extractor = ChineseDateExtractor() self._date_parser = ChineseDateParser() self._duration_extractor = ChineseDurationExtractor() self._simple_cases_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.SimpleCasesRegex) self._one_word_period_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.OneWordPeriodRegex) self._year_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DatePeriodYearRegex) self._past_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.PastRegex) self._future_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.FutureRegex) self._week_of_month_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.WeekOfMonthRegex) self._quarter_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.QuarterRegex) self._season_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.SeasonRegex) self._next_prefix_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DatePeriodNextRegex) self._past_prefix_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DatePeriodLastRegex) self._this_prefix_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DatePeriodThisRegex) self._later_early_period_regex = RegExpUtility.get_safe_reg_exp(r'\0') self._week_with_week_day_range_regex = RegExpUtility.get_safe_reg_exp( r'\0') self._token_before_date = ' on ' self._day_of_month = ChineseDateTime.ParserConfigurationDayOfMonth self._month_of_year = ChineseDateTime.ParserConfigurationMonthOfYear self._cardinal_map = ChineseDateTime.ParserConfigurationCardinalMap self._season_map = ChineseDateTime.ParserConfigurationSeasonMap self._unit_map = ChineseDateTime.ParserConfigurationUnitMap
def __init__(self): self._season_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.SeasonRegex ) self._month_suffix_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.MonthSuffixRegex ) self._year_regex_in_number = RegExpUtility.get_safe_reg_exp( ChineseDateTime.YearRegexInNumber ) self._strict_year_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.StrictYearRegex ) self._last_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DatePeriodLastRegex ) self._next_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DatePeriodNextRegex ) self._this_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DatePeriodThisRegex ) self._month_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.MonthRegex ) self._zero_to_nine_integer_regex_chinese = RegExpUtility.get_safe_reg_exp( ChineseDateTime.ZeroToNineIntegerRegexCJK ) self._relative_month_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.RelativeMonthRegex ) self._day_regex_in_chinese = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DatePeriodDayRegexInCJK ) self._day_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DayRegex ) self._simple_cases_regexes = [ RegExpUtility.get_safe_reg_exp(ChineseDateTime.SimpleCasesRegex), RegExpUtility.get_safe_reg_exp(ChineseDateTime.OneWordPeriodRegex), RegExpUtility.get_safe_reg_exp(ChineseDateTime.StrictYearRegex), RegExpUtility.get_safe_reg_exp(ChineseDateTime.YearToYear), RegExpUtility.get_safe_reg_exp( ChineseDateTime.YearToYearSuffixRequired), RegExpUtility.get_safe_reg_exp(ChineseDateTime.MonthToMonth), RegExpUtility.get_safe_reg_exp(ChineseDateTime.MonthToMonthSuffixRequired), RegExpUtility.get_safe_reg_exp(ChineseDateTime.YearAndMonth), RegExpUtility.get_safe_reg_exp( ChineseDateTime.PureNumYearAndMonth), RegExpUtility.get_safe_reg_exp( ChineseDateTime.DatePeriodYearInCJKRegex), RegExpUtility.get_safe_reg_exp(ChineseDateTime.WeekOfMonthRegex), RegExpUtility.get_safe_reg_exp(ChineseDateTime.SeasonWithYear), RegExpUtility.get_safe_reg_exp(ChineseDateTime.QuarterRegex), RegExpUtility.get_safe_reg_exp(ChineseDateTime.DecadeRegex) ] self._illegal_year_regex = RegExpUtility.get_safe_reg_exp( BaseDateTime.IllegalYearRegex) self._year_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.YearRegex) self._till_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DatePeriodTillRegex) self._followed_unit = RegExpUtility.get_safe_reg_exp( ChineseDateTime.FollowedUnit) self._number_combined_with_unit = RegExpUtility.get_safe_reg_exp( ChineseDateTime.NumberCombinedWithUnit) self._past_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.PastRegex) self._future_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.FutureRegex) self._date_point_extractor = ChineseDateExtractor() self._integer_extractor = ChineseNumberExtractor() self._number_parser = BaseNumberParser( ChineseNumberParserConfiguration()) self._now_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.NowRegex) self._month_num_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.MonthNumRegex) self._cardinal_extractor = ChineseCardinalExtractor() self._ordinal_extractor = ChineseOrdinalExtractor() # TODO When the implementation for these properties is added, change the None values to their respective Regexps self._previous_prefix_regex = None self._check_both_before_after = None self._century_suffix_regex = None self._year_period_regex = None self._duration_date_restrictions = None self._more_than_regex = None self._less_than_regex = None self._later_regex = None self._ago_regex = None self._future_suffix_regex = None self._within_next_prefix_regex = None self._time_unit_regex = None self._previous_prefix_regex = None
def __init__(self): self._complex_dateperiod_regex = None self._relative_decade_regex = None self._relative_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.RelativeRegex) self._date_extractor = ChineseDateExtractor() self._date_parser = ChineseDateParser() self._duration_extractor = ChineseDurationExtractor() self._simple_cases_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.SimpleCasesRegex) self._one_word_period_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.OneWordPeriodRegex) self._year_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.YearRegex) self._past_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.PastRegex) self._future_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.FutureRegex) self._week_of_month_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.WeekOfMonthRegex) self._quarter_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.QuarterRegex) self._season_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.SeasonRegex) self._next_prefix_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DatePeriodNextRegex) self._past_prefix_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DatePeriodLastRegex) self._this_prefix_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DatePeriodThisRegex) self._later_early_period_regex = RegExpUtility.get_safe_reg_exp(r'\0') self._week_with_week_day_range_regex = RegExpUtility.get_safe_reg_exp( r'\0') self._token_before_date = ' on ' self._day_of_month = ChineseDateTime.ParserConfigurationDayOfMonth self._month_of_year = ChineseDateTime.ParserConfigurationMonthOfYear self._cardinal_map = ChineseDateTime.ParserConfigurationCardinalMap self._season_map = ChineseDateTime.ParserConfigurationSeasonMap self._unit_map = ChineseDateTime.ParserConfigurationUnitMap self._now_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.NowRegex) # TODO When the implementation for these properties is added, change the None values to their respective Regexps self._reference_date_period_regex = RegExpUtility.get_safe_reg_exp( r'\0') self._decade_with_century_regex = None self._later_regex = None self._ago_regex = None
def __init__(self): super().__init__(ChineseDatePeriodParserConfiguration()) self.integer_extractor = ChineseIntegerExtractor() self.number_parser = CJKNumberParser( ChineseNumberParserConfiguration()) self.year_in_chinese_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DatePeriodYearInChineseRegex) self.number_combined_with_unit_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.NumberCombinedWithUnit) self.unit_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.UnitRegex) self.year_and_month_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.YearAndMonth) self.pure_number_year_and_month_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.PureNumYearAndMonth) self.year_to_year_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.YearToYear) self.year_to_year_suffix_required = RegExpUtility.get_safe_reg_exp( ChineseDateTime.YearToYearSuffixRequired) self.chinese_year_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DatePeriodYearInChineseRegex) self.season_with_year_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.SeasonWithYear) self.decade_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DecadeRegex) self.date_this_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DatePeriodThisRegex) self.date_last_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DatePeriodLastRegex) self.date_next_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DatePeriodNextRegex)
class ChineseDateTimeExtractor(BaseDateTimeExtractor): before_regex = RegExpUtility.get_safe_reg_exp(ChineseDateTime.BeforeRegex) after_regex = RegExpUtility.get_safe_reg_exp(ChineseDateTime.AfterRegex) date_time_period_unit_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DateTimePeriodUnitRegex) def __init__(self): super().__init__(ChineseDateTimeExtractorConfiguration()) self.duration_extractor = ChineseDurationExtractor() def extract(self, source: str, reference: datetime = None) -> List[ExtractResult]: if reference is None: reference = datetime.now() tokens: List[Token] = list() tokens.extend(self.merge_date_and_time(source, reference)) tokens.extend(self.basic_regex_match(source)) tokens.extend(self.time_of_today(source, reference)) tokens.extend(self.duration_with_ago_and_later(source, reference)) result = merge_all_tokens(tokens, source, self.extractor_type_name) return result def merge_date_and_time(self, source: str, reference: datetime) -> List[Token]: tokens: List[Token] = list() ers: List[ExtractResult] = self.config.date_point_extractor.extract( source, reference) if len(ers) < 1: return tokens ers.extend(self.config.time_point_extractor.extract(source, reference)) if len(ers) < 2: return tokens ers = sorted(ers, key=lambda x: x.start) i = 0 while i < len(ers) - 1: j = i + 1 while j < len(ers) and ers[i].overlap(ers[j]): j += 1 if j >= len(ers): break if ers[i].type is Constants.SYS_DATETIME_DATE and ers[ j].type is Constants.SYS_DATETIME_TIME: middle_begin = ers[i].start + ers[i].length middle_end = ers[j].start if middle_begin > middle_end: continue middle = source[middle_begin:middle_end].strip().lower() if self.config.is_connector_token(middle): begin = ers[i].start end = ers[j].start + ers[j].length tokens.append(Token(begin, end)) i = j + 1 continue i = j return tokens def time_of_today(self, source: str, reference: datetime) -> List[Token]: tokens: List[Token] = list() ers = self.config.time_point_extractor.extract(source, reference) for er in ers: before = source[:er.start] inner_match = regex.search(self.config.night_regex, er.text) if inner_match is not None and inner_match.start() == 0: before = source[:er.start + len(inner_match.group())] if not before: continue match = regex.search(self.config.time_of_today_before_regex, before) if match is not None and not before[match.end():].strip(): begin = match.start() end = er.start + er.length tokens.append(Token(begin, end)) return tokens def duration_with_ago_and_later(self, source: str, reference: datetime) -> List[Token]: ret: List[Token] = list() duration_er = self.duration_extractor.extract(source, reference) for er in duration_er: pos = er.start + er.length if pos < len(source): suffix = source[pos] before_match = RegExpUtility.get_matches( self.before_regex, suffix) after_match = RegExpUtility.get_matches( self.after_regex, suffix) if (before_match and suffix.startswith(before_match[0])) \ or (after_match and suffix.startswith(after_match[0])): meta_data = MetaData() meta_data.is_duration_with_ago_and_later = True ret.append(Token(er.start, pos + 1, meta_data)) return ret
def _parse_decade(self, source: str, reference: datetime) -> DateTimeResolutionResult: result = DateTimeResolutionResult() century = int(reference.year / 100) + 1 decade_last_year = 10 input_century = False match = regex.search(self.decade_regex, source) if not match or len(match.group()) != len(source): return result decade_str = RegExpUtility.get_group(match, Constants.DECADE) decade = self.__convert_chinese_to_number(decade_str) century_str = RegExpUtility.get_group(match, Constants.CENTURY) if century_str != "": century = self.__convert_chinese_to_number(century_str) input_century = True else: century_str = RegExpUtility.get_group(match, Constants.REL_CENTURY) if century_str != "": century_str = century_str.strip().lower() this_match = regex.search(self.date_this_regex, century_str) next_match = regex.search(self.date_next_regex, century_str) last_match = regex.search(self.date_last_regex, century_str) if next_match: century += 1 elif last_match: century -= 1 input_century = True begin_year = ((century - 1) * 100) + decade end_year = begin_year + decade_last_year if input_century: begin_luis_str = DateTimeFormatUtil.luis_date(begin_year, 1, 1) end_luis_str = DateTimeFormatUtil.luis_date(end_year, 1, 1) else: begin_year_str = "XX{:02d}".format(decade) begin_luis_str = DateTimeFormatUtil.luis_date(-1, 1, 1) begin_luis_str = begin_luis_str.replace("XXXX", begin_year_str) end_year_str = "XX{:02d}".format(end_year % 100) end_luis_str = DateTimeFormatUtil.luis_date(-1, 1, 1) end_luis_str = end_luis_str.replace("XXXX", end_year_str) result.timex = f"({begin_luis_str},{end_luis_str},P10Y)" future_year, past_year = begin_year, begin_year start_date = DateUtils.safe_create_from_min_value(begin_year, 1, 1) if not input_century and start_date < reference: future_year += 100 if not input_century and start_date >= reference: past_year -= 100 result.future_value = [ DateUtils.safe_create_from_min_value(future_year, 1, 1), DateUtils.safe_create_from_min_value( future_year + decade_last_year, 1, 1) ] result.past_value = [ DateUtils.safe_create_from_min_value(past_year, 1, 1), DateUtils.safe_create_from_min_value(past_year + decade_last_year, 1, 1) ] result.success = True return result
def __init__(self): self._holiday_regexes = [ RegExpUtility.get_safe_reg_exp(ChineseDateTime.HolidayRegexList1), RegExpUtility.get_safe_reg_exp(ChineseDateTime.HolidayRegexList2), RegExpUtility.get_safe_reg_exp(ChineseDateTime.LunarHolidayRegex) ]
def parser_duration_with_ago_and_later( self, source: str, reference: datetime) -> DateTimeResolutionResult: result = DateTimeResolutionResult() duration_res = self.duration_extractor.extract( source, reference).pop() if self.duration_extractor.extract( source, reference) else [] if duration_res: match = self.config._unit_regex.search(source) if match: suffix = source[duration_res.start + duration_res.length:] src_unit = RegExpUtility.get_group(match, 'unit') number_str = source[duration_res.start:match.lastindex - duration_res.start + 1] number = self.parse_chinese_written_number_to_value(number_str) if src_unit in self.config.unit_map: unit_str = self.config.unit_map.get(src_unit) before_match = RegExpUtility.get_matches( ChineseDateExtractor.before_regex, suffix) if before_match and suffix.startswith(before_match[0]): if unit_str == Constants.TIMEX_DAY: date = reference + timedelta(days=-number) elif unit_str == Constants.TIMEX_WEEK: date = reference + timedelta(days=-7 * number) elif unit_str == Constants.TIMEX_MONTH_FULL: date = reference.replace(month=reference.month - 1) elif unit_str == Constants.TIMEX_YEAR: date = reference.replace(year=reference.year - 1) else: return result result.timex = DateTimeFormatUtil.luis_date_from_datetime( date) result.future_value = result.past_value = date result.success = True return result after_match = RegExpUtility.get_matches( ChineseDateExtractor.after_regex, suffix) if after_match and suffix.startswith(after_match[0]): if unit_str == Constants.TIMEX_DAY: date = reference + timedelta(days=number) elif unit_str == Constants.TIMEX_WEEK: date = reference + timedelta(days=7 * number) elif unit_str == Constants.TIMEX_MONTH_FULL: date = reference.replace(month=reference.month + 1) elif unit_str == Constants.TIMEX_YEAR: date = reference.replace(year=reference.year + 1) else: return result result.timex = DateTimeFormatUtil.luis_date_from_datetime( date) result.future_value = result.past_value = date result.success = True return result return result
def __init__(self): self._date_regex_list = [ RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList1), RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList2), RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList3), RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList4), RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList5) ] if ChineseDateTime.DefaultLanguageFallback == Constants.DEFAULT_LANGUAGE_FALLBACK_DMY: self._date_regex_list.append( RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList7)) self._date_regex_list.append( RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList6)) else: self._date_regex_list.append( RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList6)) self._date_regex_list.append( RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList7)) self._date_regex_list.append( RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList8)) self._implicit_date_list = [ RegExpUtility.get_safe_reg_exp(ChineseDateTime.LunarRegex), RegExpUtility.get_safe_reg_exp(ChineseDateTime.SpecialDayRegex), RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateThisRegex), RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateLastRegex), RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateNextRegex), RegExpUtility.get_safe_reg_exp(ChineseDateTime.WeekDayRegex), RegExpUtility.get_safe_reg_exp( ChineseDateTime.WeekDayOfMonthRegex), RegExpUtility.get_safe_reg_exp(ChineseDateTime.SpecialDate) ]
def __init__(self): super().__init__(ChineseTimePeriodParserConfiguration()) self.day_description_regex = RegExpUtility.get_safe_reg_exp(ChineseDateTime.TimeDayDescRegex) self.only_digit_match = RegExpUtility.get_safe_reg_exp(r'\d+') self.numbers_map = ChineseDateTime.TimeNumberDictionary self.low_bound_map = ChineseDateTime.TimeLowBoundDesc
def __init__(self): super().__init__(dict([ (RegExpUtility.get_safe_reg_exp(ChineseDateTime.TimeRegexes1), TimeType.ChineseTime), (RegExpUtility.get_safe_reg_exp(ChineseDateTime.TimeRegexes2), TimeType.DigitTime), (RegExpUtility.get_safe_reg_exp(ChineseDateTime.TimeRegexes3), TimeType.LessTime) ]))