def __init__(self): super().__init__( dict([(RegExpUtility.get_safe_reg_exp( ChineseDateTime.TimePeriodRegexes1), TimePeriodType.FullTime), (RegExpUtility.get_safe_reg_exp( ChineseDateTime.TimePeriodRegexes2), TimePeriodType.ShortTime)]))
def __init__(self): super().__init__(ChineseTimePeriodParserConfiguration()) self.day_description_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.TimeDayDescRegex) self.only_digit_match = RegExpUtility.get_safe_reg_exp(r'\d+') self.numbers_map = ChineseDateTime.TimeNumberDictionary self.low_bound_map = ChineseDateTime.TimeLowBoundDesc
def __init__(self): super().__init__(ChineseDateTimePeriodExtractorConfiguration()) self.zhijian_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.ZhijianRegex) self.past_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.PastRegex) self.future_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.FutureRegex)
def __init__(self): super().__init__(None) self.extractor = NumberWithUnitExtractor( ChineseDurationExtractorConfiguration()) self.year_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DurationYearRegex) self.half_suffix_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DurationHalfSuffixRegex)
class ChineseDateExtractor(BaseDateExtractor): before_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.BeforeRegex) after_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.AfterRegex) date_time_period_unit_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DateTimePeriodUnitRegex) def __init__(self): super().__init__(ChineseDateExtractorConfiguration()) self.duration_extractor = ChineseDurationExtractor() def extract(self, source: str, reference: datetime = None) -> List[ExtractResult]: if reference is None: reference = datetime.now() tokens = self.basic_regex_match(source) tokens.extend(self.implicit_date(source)) tokens.extend(self.relative_duration_date(source, reference)) result = merge_all_tokens(tokens, source, self.extractor_type_name) return result def relative_duration_date(self, source: str, reference: datetime) -> List[Token]: ret: List[Token] = list() duration_er = self.duration_extractor.extract(source, reference) for er in duration_er: if not regex.search(self.date_time_period_unit_regex, er.text): pos = er.start + er.length if pos < len(source): suffix = source[pos] before_match = RegExpUtility.get_matches(self.before_regex, suffix) after_match = RegExpUtility.get_matches(self.after_regex, suffix) if (before_match and suffix.startswith(before_match[0])) \ or (after_match and suffix.startswith(after_match[0])): meta_data = MetaData() meta_data.is_duration_with_ago_and_later = True ret.append(Token(er.start, pos + 1, meta_data)) return ret def basic_regex_match(self, source: str) -> List[Token]: ret: List[Token] = list() for regexp in self.config.date_regex_list: ret.extend(get_tokens_from_regex(regexp, source)) return ret def implicit_date(self, source: str) -> List[Token]: ret: List[Token] = list() for regexp in self.config.implicit_date_list: ret.extend(get_tokens_from_regex(regexp, source)) return ret
def __init__(self): super().__init__(dict([ (RegExpUtility.get_safe_reg_exp( ChineseDateTime.TimeRegexes1), TimeType.ChineseTime), (RegExpUtility.get_safe_reg_exp( ChineseDateTime.TimeRegexes2), TimeType.DigitTime), (RegExpUtility.get_safe_reg_exp( ChineseDateTime.TimeRegexes3), TimeType.LessTime) ]))
def __init__(self): super().__init__(CultureInfo(Culture.Chinese)) self._year_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DurationYearRegex) self._half_suffix_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DurationHalfSuffixRegex) self._extract_type = Constants.SYS_DATETIME_DURATION self._suffix_list = ChineseDateTime.DurationSuffixList self._prefix_list = dict() self._ambiguous_unit_list = ChineseDateTime.DurationAmbiguousUnits
def __init__(self): self._date_point_extractor = ChineseDateExtractor() self._time_point_extractor = ChineseTimeExtractor() self._now_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.NowRegex) self._night_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.NightRegex) self._time_of_today_before_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.TimeOfTodayRegex) self._preposition_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.PrepositionRegex)
def __init__(self): super().__init__(ChineseDateParserConfiguration()) self.lunar_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.LunarRegex) self.special_date_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.SpecialDate) self.token_next_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DateNextRe) self.token_last_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DateLastRe) self.month_max_days: List[int] = [ 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
def __init__(self): self._last_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.SetLastRegex) self._each_prefix_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.SetEachPrefixRegex) self._each_unit_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.SetEachUnitRegex) self._each_day_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.SetEachDayRegex) self._before_each_day_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.SetEachDayRegex) self._duration_extractor = ChineseDurationExtractor() self._time_extractor = ChineseTimeExtractor() self._date_extractor = ChineseDateExtractor() self._date_time_extractor = ChineseDateTimeExtractor()
def __init__(self): self._before_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.MergedBeforeRegex) self._after_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.MergedAfterRegex) self._since_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.MergedAfterRegex) self._date_parser = ChineseDateParser() self._holiday_parser = ChineseHolidayParser() self._time_parser = ChineseTimeParser() self._date_time_parser = ChineseDateTimeParser() self._date_period_parser = ChineseDatePeriodParser() self._time_period_parser = ChineseTimePeriodParser() self._date_time_period_parser = ChineseDateTimePeriodParser() self._duration_parser = ChineseDurationParser() self._set_parser = ChineseSetParser()
def __init__(self): super().__init__(ChineseDatePeriodParserConfiguration()) self.integer_extractor = ChineseIntegerExtractor() self.number_parser = CJKNumberParser(ChineseNumberParserConfiguration()) self.year_in_chinese_regex = RegExpUtility.get_safe_reg_exp(ChineseDateTime.DatePeriodYearInChineseRegex) self.number_combined_with_unit_regex = RegExpUtility.get_safe_reg_exp(ChineseDateTime.NumberCombinedWithUnit) self.unit_regex = RegExpUtility.get_safe_reg_exp(ChineseDateTime.UnitRegex) self.year_and_month_regex = RegExpUtility.get_safe_reg_exp(ChineseDateTime.YearAndMonth) self.pure_number_year_and_month_regex = RegExpUtility.get_safe_reg_exp(ChineseDateTime.PureNumYearAndMonth) self.year_to_year_regex = RegExpUtility.get_safe_reg_exp(ChineseDateTime.YearToYear) self.year_to_year_suffix_required = RegExpUtility.get_safe_reg_exp(ChineseDateTime.YearToYearSuffixRequired) self.chinese_year_regex = RegExpUtility.get_safe_reg_exp(ChineseDateTime.DatePeriodYearInChineseRegex) self.season_with_year_regex = RegExpUtility.get_safe_reg_exp(ChineseDateTime.SeasonWithYear)
def __init__(self): super().__init__() self._datetime_period_unit_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DateTimePeriodUnitRegex ) self._after_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.AfterRegex ) self._before_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.BeforeRegex ) self._date_point_extractor = ChineseDateExtractor() self._time_point_extractor = ChineseTimeExtractor() self._now_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.NowRegex) self._night_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.NightRegex) self._time_of_today_before_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.TimeOfTodayRegex) self._preposition_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.PrepositionRegex) # TODO When the implementation for these properties is added, change the None values to their respective Regexps self._year_regex = None self._year_suffix = None self._suffix_after_regex = None self._date_number_connector_regex = None self._number_as_time_regex = None
def __init__(self): self._after_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.AfterRegex) self._before_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.BeforeRegex) self._specific_time_of_day_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.SpecificTimeOfDayRegex) self._time_of_day_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.TimeOfDayRegex) self._past_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.PastRegex) self._future_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.FutureRegex) self._relative_time_unit_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.TimeOfDayRegex) self._unit_map = ChineseDateTime.ParserConfigurationUnitMap self._date_extractor = ChineseDateExtractor() self._time_extractor = ChineseTimeExtractor() self._date_time_extractor = ChineseDateTimeExtractor() self._time_period_extractor = ChineseTimePeriodExtractor() self._date_parser = ChineseDateParser() self._time_parser = ChineseTimeParser() self._date_time_parser = ChineseDateTimeParser() self._time_period_parser = ChineseTimePeriodParser() self._check_both_before_after = None self._token_before_date = None self._prefix_day_regex = None self._am_desc_regex = None self._pm_desc_regex = None self._cardinal_extractor = None self._previous_prefix_regex = None self._within_next_prefix_regex = None self._future_suffix_regex = None
def __init__(self): self._specific_time_of_day_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.SpecificTimeOfDayRegex) self._past_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.PastRegex) self._future_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.FutureRegex) self._relative_time_unit_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.TimeOfDayRegex) self._unit_map = ChineseDateTime.ParserConfigurationUnitMap self._date_extractor = ChineseDateExtractor() self._time_extractor = ChineseTimeExtractor() self._date_time_extractor = ChineseDateTimeExtractor() self._time_period_extractor = ChineseTimePeriodExtractor() self._date_parser = ChineseDateParser() self._time_parser = ChineseTimeParser() self._date_time_parser = ChineseDateTimeParser() self._time_period_parser = ChineseTimePeriodParser()
def __init__(self): super().__init__(None) self.only_digit_match = RegExpUtility.get_safe_reg_exp('\\d+') self.numbers_map = ChineseDateTime.TimeNumberDictionary self.low_bound_map = ChineseDateTime.TimeLowBoundDesc self.function_map = { TimeType.ChineseTime: self.handle_chinese, TimeType.DigitTime: self.handle_digit, TimeType.LessTime: self.handle_less } self.inner_extractor = ChineseTimeExtractor()
def __init__(self): super().__init__() self._cardinal_extractor = ChineseCardinalExtractor() self._single_date_extractor = ChineseDateExtractor() self._single_time_extractor = ChineseTimeExtractor() self._single_date_time_extractor = ChineseDateTimeExtractor() self._preposition_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DateTimePeriodPrepositionRegex) self._till_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DateTimePeriodTillRegex) self._specific_time_of_day_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.SpecificTimeOfDayRegex) self._time_of_day_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.TimeOfDayRegex) self._followed_unit = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DateTimePeriodFollowedUnit) self._time_unit_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DateTimePeriodUnitRegex) # TODO When the implementation for these properties is added, change the None values to their respective Regexps self._suffix_regex = None self._after_regex = None self._before_regex = None self._prefix_day_regex = None self._pm_desc_regex = None self._am_desc_regex = None self._date_unit_regex = None self._future_suffix_regex = None self._within_next_prefix_regex = None self._token_before_date = None
def __init__(self): self._equal_regex = RegExpUtility.get_safe_reg_exp( BaseDateTime.EqualRegex) self._since_suffix_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.ParserConfigurationSinceSuffix) self._since_prefix_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.ParserConfigurationSincePrefix) self._until_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.ParserConfigurationUntil) self._after_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.AfterRegex) self._before_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.BeforeRegex) self._ambiguity_filters_dict = ChineseDateTime.AmbiguityFiltersDict self._date_extractor = ChineseDateExtractor() self._time_extractor = ChineseTimeExtractor() self._date_time_extractor = ChineseDateTimeExtractor() self._date_period_extractor = ChineseDatePeriodExtractor() self._time_period_extractor = ChineseTimePeriodExtractor() self._date_time_period_extractor = ChineseDateTimePeriodExtractor() self._holiday_extractor = BaseHolidayExtractor( ChineseHolidayExtractorConfiguration()) self._duration_extractor = ChineseDurationExtractor() self._set_extractor = ChineseSetExtractor() # TODO When the implementation for these properties is added, change the None values to their respective Regexps self._superfluous_word_matcher = None self._fail_fast_regex = None self._unspecified_date_period_regex = None self._suffix_after_regex = None self._potential_ambiguous_range_regex = None self._ambiguous_range_modifier_prefix = None self._around_regex = None self._term_filter_regexes = None self._datetime_alt_extractor = None self._time_zone_extractor = None
def __init__(self): self._equal_regex = RegExpUtility.get_safe_reg_exp(BaseDateTime.EqualRegex) self._year_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.YearRegex ) self._before_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.MergedBeforeRegex) self._after_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.MergedAfterRegex) self._since_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.MergedAfterRegex) self._date_parser = ChineseDateParser() self._holiday_parser = ChineseHolidayParser() self._time_parser = ChineseTimeParser() self._date_time_parser = ChineseDateTimeParser() self._date_period_parser = ChineseDatePeriodParser() self._time_period_parser = ChineseTimePeriodParser() self._date_time_period_parser = ChineseDateTimePeriodParser() self._duration_parser = ChineseDurationParser() self._set_parser = ChineseSetParser() # TODO When the implementation for these properties is added, change the None values to their respective Regexps self._around_regex = None self._suffix_after = None
def __init__(self): super().__init__(ChineseDateTimePeriodParserConfiguration()) self.tmo_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DateTimePeriodMORegex) self.tmi_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DateTimePeriodMIRegex) self.taf_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DateTimePeriodAFRegex) self.tev_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DateTimePeriodEVRegex) self.tni_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DateTimePeriodNIRegex) self.unit_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DateTimePeriodUnitRegex) self.time_of_day_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.TimeOfDayRegex) self.cardinal_extractor = ChineseCardinalExtractor() self.cardinal_parser = CJKNumberParser( ChineseNumberParserConfiguration())
def __init__(self): self._cardinal_extractor = ChineseCardinalExtractor() self._single_date_extractor = ChineseDateExtractor() self._single_time_extractor = ChineseTimeExtractor() self._single_date_time_extractor = ChineseDateTimeExtractor() self._preposition_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DateTimePeriodPrepositionRegex) self._till_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DateTimePeriodTillRegex) self._specific_time_of_day_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.SpecificTimeOfDayRegex) self._time_of_day_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.TimeOfDayRegex) self._followed_unit = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DateTimePeriodFollowedUnit) self._time_unit_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DateTimePeriodUnitRegex)
def __init__(self): self._holiday_regexes = [ RegExpUtility.get_safe_reg_exp(ChineseDateTime.HolidayRegexList1), RegExpUtility.get_safe_reg_exp(ChineseDateTime.HolidayRegexList2), RegExpUtility.get_safe_reg_exp(ChineseDateTime.LunarHolidayRegex) ]
def __init__(self): self._complex_dateperiod_regex = None self._relative_decade_regex = None self._relative_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.RelativeRegex) self._date_extractor = ChineseDateExtractor() self._date_parser = ChineseDateParser() self._duration_extractor = ChineseDurationExtractor() self._simple_cases_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.SimpleCasesRegex) self._one_word_period_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.OneWordPeriodRegex) self._year_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.YearRegex) self._past_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.PastRegex) self._future_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.FutureRegex) self._week_of_month_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.WeekOfMonthRegex) self._quarter_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.QuarterRegex) self._season_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.SeasonRegex) self._next_prefix_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DatePeriodNextRegex) self._past_prefix_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DatePeriodLastRegex) self._this_prefix_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DatePeriodThisRegex) self._later_early_period_regex = RegExpUtility.get_safe_reg_exp(r'\0') self._week_with_week_day_range_regex = RegExpUtility.get_safe_reg_exp( r'\0') self._token_before_date = ' on ' self._day_of_month = ChineseDateTime.ParserConfigurationDayOfMonth self._month_of_year = ChineseDateTime.ParserConfigurationMonthOfYear self._cardinal_map = ChineseDateTime.ParserConfigurationCardinalMap self._season_map = ChineseDateTime.ParserConfigurationSeasonMap self._unit_map = ChineseDateTime.ParserConfigurationUnitMap self._now_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.NowRegex) # TODO When the implementation for these properties is added, change the None values to their respective Regexps self._reference_date_period_regex = RegExpUtility.get_safe_reg_exp( r'\0') self._decade_with_century_regex = None self._later_regex = None self._ago_regex = None
def __init__(self): self._date_regex = [ RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList1), RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList2), RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList3), RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList4), RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList5) ] if ChineseDateTime.DefaultLanguageFallback == Constants.DEFAULT_LANGUAGE_FALLBACK_DMY: self._date_regex.append( RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList7)) self._date_regex.append( RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList6)) else: self._date_regex.append( RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList6)) self._date_regex.append( RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList7)) self._date_regex.append( RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList8)) self._month_of_year = ChineseDateTime.ParserConfigurationMonthOfYear self._day_of_month = ChineseDateTime.ParserConfigurationDayOfMonth self._day_of_week = ChineseDateTime.ParserConfigurationDayOfWeek self._special_day_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.SpecialDayRegex) self._special_day_with_num_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.SpecialDayWithNumRegex) self._this_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DateThisRegex) self._next_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DateNextRegex) self._last_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DateLastRegex) self._unit_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DateUnitRegex) self._unit_map = ChineseDateTime.ParserConfigurationUnitMap self._week_day_of_month_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.WeekDayOfMonthRegex) self._week_day_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.WeekDayRegex) self._integer_extractor = ChineseIntegerExtractor() self._number_parser = CJKNumberParser( ChineseNumberParserConfiguration())
class ChineseDateTimeExtractor(BaseDateTimeExtractor): before_regex = RegExpUtility.get_safe_reg_exp(ChineseDateTime.BeforeRegex) after_regex = RegExpUtility.get_safe_reg_exp(ChineseDateTime.AfterRegex) date_time_period_unit_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DateTimePeriodUnitRegex) def __init__(self): super().__init__(ChineseDateTimeExtractorConfiguration()) self.duration_extractor = ChineseDurationExtractor() def extract(self, source: str, reference: datetime = None) -> List[ExtractResult]: if reference is None: reference = datetime.now() tokens: List[Token] = list() tokens.extend(self.merge_date_and_time(source, reference)) tokens.extend(self.basic_regex_match(source)) tokens.extend(self.time_of_today(source, reference)) tokens.extend(self.duration_with_ago_and_later(source, reference)) result = merge_all_tokens(tokens, source, self.extractor_type_name) return result def merge_date_and_time(self, source: str, reference: datetime) -> List[Token]: tokens: List[Token] = list() ers: List[ExtractResult] = self.config.date_point_extractor.extract( source, reference) if len(ers) < 1: return tokens ers.extend(self.config.time_point_extractor.extract(source, reference)) if len(ers) < 2: return tokens ers = sorted(ers, key=lambda x: x.start) i = 0 while i < len(ers) - 1: j = i + 1 while j < len(ers) and ers[i].overlap(ers[j]): j += 1 if j >= len(ers): break if ers[i].type is Constants.SYS_DATETIME_DATE and ers[ j].type is Constants.SYS_DATETIME_TIME: middle_begin = ers[i].start + ers[i].length middle_end = ers[j].start if middle_begin > middle_end: continue middle = source[middle_begin:middle_end].strip().lower() if self.config.is_connector_token(middle): begin = ers[i].start end = ers[j].start + ers[j].length tokens.append(Token(begin, end)) i = j + 1 continue i = j return tokens def time_of_today(self, source: str, reference: datetime) -> List[Token]: tokens: List[Token] = list() ers = self.config.time_point_extractor.extract(source, reference) for er in ers: before = source[:er.start] inner_match = regex.search(self.config.night_regex, er.text) if inner_match is not None and inner_match.start() == 0: before = source[:er.start + len(inner_match.group())] if not before: continue match = regex.search(self.config.time_of_today_before_regex, before) if match is not None and not before[match.end():].strip(): begin = match.start() end = er.start + er.length tokens.append(Token(begin, end)) return tokens def duration_with_ago_and_later(self, source: str, reference: datetime) -> List[Token]: ret: List[Token] = list() duration_er = self.duration_extractor.extract(source, reference) for er in duration_er: pos = er.start + er.length if pos < len(source): suffix = source[pos] before_match = RegExpUtility.get_matches( self.before_regex, suffix) after_match = RegExpUtility.get_matches( self.after_regex, suffix) if (before_match and suffix.startswith(before_match[0])) \ or (after_match and suffix.startswith(after_match[0])): meta_data = MetaData() meta_data.is_duration_with_ago_and_later = True ret.append(Token(er.start, pos + 1, meta_data)) return ret
def __init__(self): self._season_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.SeasonRegex ) self._month_suffix_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.MonthSuffixRegex ) self._year_regex_in_number = RegExpUtility.get_safe_reg_exp( ChineseDateTime.YearRegexInNumber ) self._strict_year_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.StrictYearRegex ) self._last_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DatePeriodLastRegex ) self._next_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DatePeriodNextRegex ) self._this_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DatePeriodThisRegex ) self._month_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.MonthRegex ) self._zero_to_nine_integer_regex_chinese = RegExpUtility.get_safe_reg_exp( ChineseDateTime.ZeroToNineIntegerRegexCJK ) self._relative_month_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.RelativeMonthRegex ) self._day_regex_in_chinese = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DatePeriodDayRegexInCJK ) self._day_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DayRegex ) self._simple_cases_regexes = [ RegExpUtility.get_safe_reg_exp(ChineseDateTime.SimpleCasesRegex), RegExpUtility.get_safe_reg_exp(ChineseDateTime.OneWordPeriodRegex), RegExpUtility.get_safe_reg_exp(ChineseDateTime.StrictYearRegex), RegExpUtility.get_safe_reg_exp(ChineseDateTime.YearToYear), RegExpUtility.get_safe_reg_exp( ChineseDateTime.YearToYearSuffixRequired), RegExpUtility.get_safe_reg_exp(ChineseDateTime.MonthToMonth), RegExpUtility.get_safe_reg_exp(ChineseDateTime.MonthToMonthSuffixRequired), RegExpUtility.get_safe_reg_exp(ChineseDateTime.YearAndMonth), RegExpUtility.get_safe_reg_exp( ChineseDateTime.PureNumYearAndMonth), RegExpUtility.get_safe_reg_exp( ChineseDateTime.DatePeriodYearInCJKRegex), RegExpUtility.get_safe_reg_exp(ChineseDateTime.WeekOfMonthRegex), RegExpUtility.get_safe_reg_exp(ChineseDateTime.SeasonWithYear), RegExpUtility.get_safe_reg_exp(ChineseDateTime.QuarterRegex), RegExpUtility.get_safe_reg_exp(ChineseDateTime.DecadeRegex) ] self._illegal_year_regex = RegExpUtility.get_safe_reg_exp( BaseDateTime.IllegalYearRegex) self._year_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.YearRegex) self._till_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DatePeriodTillRegex) self._followed_unit = RegExpUtility.get_safe_reg_exp( ChineseDateTime.FollowedUnit) self._number_combined_with_unit = RegExpUtility.get_safe_reg_exp( ChineseDateTime.NumberCombinedWithUnit) self._past_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.PastRegex) self._future_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.FutureRegex) self._date_point_extractor = ChineseDateExtractor() self._integer_extractor = ChineseNumberExtractor() self._number_parser = BaseNumberParser( ChineseNumberParserConfiguration()) self._now_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.NowRegex) self._month_num_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.MonthNumRegex) self._cardinal_extractor = ChineseCardinalExtractor() self._ordinal_extractor = ChineseOrdinalExtractor() # TODO When the implementation for these properties is added, change the None values to their respective Regexps self._previous_prefix_regex = None self._check_both_before_after = None self._century_suffix_regex = None self._year_period_regex = None self._duration_date_restrictions = None self._more_than_regex = None self._less_than_regex = None self._later_regex = None self._ago_regex = None self._future_suffix_regex = None self._within_next_prefix_regex = None self._time_unit_regex = None self._previous_prefix_regex = None
def __init__(self, options: DateTimeOptions): super().__init__(ChineseMergedExtractorConfiguration(), options) self.day_of_month_regex = RegExpUtility.get_safe_reg_exp( '^\\d{1,2}号', regex.I)
def __init__(self, regex_true, regex_false, token_regex, only_top_match): self.regex_true = RegExpUtility.get_safe_reg_exp(regex_true) self.regex_false = RegExpUtility.get_safe_reg_exp(regex_false) self.token_regex = RegExpUtility.get_safe_reg_exp(token_regex) self.only_top_match = only_top_match
def __init__(self): self._simple_cases_regexes = [ RegExpUtility.get_safe_reg_exp(ChineseDateTime.SimpleCasesRegex), RegExpUtility.get_safe_reg_exp(ChineseDateTime.OneWordPeriodRegex), RegExpUtility.get_safe_reg_exp(ChineseDateTime.StrictYearRegex), RegExpUtility.get_safe_reg_exp(ChineseDateTime.YearToYear), RegExpUtility.get_safe_reg_exp( ChineseDateTime.YearToYearSuffixRequired), RegExpUtility.get_safe_reg_exp(ChineseDateTime.YearAndMonth), RegExpUtility.get_safe_reg_exp( ChineseDateTime.PureNumYearAndMonth), RegExpUtility.get_safe_reg_exp( ChineseDateTime.DatePeriodYearInChineseRegex), RegExpUtility.get_safe_reg_exp(ChineseDateTime.WeekOfMonthRegex), RegExpUtility.get_safe_reg_exp(ChineseDateTime.SeasonWithYear), RegExpUtility.get_safe_reg_exp(ChineseDateTime.QuarterRegex), ] self._illegal_year_regex = RegExpUtility.get_safe_reg_exp( BaseDateTime.IllegalYearRegex) self._year_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.YearRegex) self._till_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.DatePeriodTillRegex) self._followed_unit = RegExpUtility.get_safe_reg_exp( ChineseDateTime.FollowedUnit) self._number_combined_with_unit = RegExpUtility.get_safe_reg_exp( ChineseDateTime.NumberCombinedWithUnit) self._past_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.PastRegex) self._future_regex = RegExpUtility.get_safe_reg_exp( ChineseDateTime.FutureRegex) self._date_point_extractor = ChineseDateExtractor() self._integer_extractor = ChineseNumberExtractor() self._number_parser = BaseNumberParser( ChineseNumberParserConfiguration())
def __init__(self): self._date_regex_list = [ RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList1), RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList2), RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList3), RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList4), RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList5) ] if ChineseDateTime.DefaultLanguageFallback == Constants.DEFAULT_LANGUAGE_FALLBACK_DMY: self._date_regex_list.append( RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList7)) self._date_regex_list.append( RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList6)) else: self._date_regex_list.append( RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList6)) self._date_regex_list.append( RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList7)) self._date_regex_list.append( RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList8)) self._implicit_date_list = [ RegExpUtility.get_safe_reg_exp(ChineseDateTime.LunarRegex), RegExpUtility.get_safe_reg_exp(ChineseDateTime.SpecialDayRegex), RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateThisRegex), RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateLastRegex), RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateNextRegex), RegExpUtility.get_safe_reg_exp(ChineseDateTime.WeekDayRegex), RegExpUtility.get_safe_reg_exp( ChineseDateTime.WeekDayOfMonthRegex), RegExpUtility.get_safe_reg_exp(ChineseDateTime.SpecialDate) ]