Beispiel #1
0
 def __init__(self):
     super().__init__(
         dict([(RegExpUtility.get_safe_reg_exp(
             ChineseDateTime.TimePeriodRegexes1), TimePeriodType.FullTime),
               (RegExpUtility.get_safe_reg_exp(
                   ChineseDateTime.TimePeriodRegexes2),
                TimePeriodType.ShortTime)]))
 def __init__(self):
     super().__init__(ChineseTimePeriodParserConfiguration())
     self.day_description_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.TimeDayDescRegex)
     self.only_digit_match = RegExpUtility.get_safe_reg_exp(r'\d+')
     self.numbers_map = ChineseDateTime.TimeNumberDictionary
     self.low_bound_map = ChineseDateTime.TimeLowBoundDesc
 def __init__(self):
     super().__init__(ChineseDateTimePeriodExtractorConfiguration())
     self.zhijian_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.ZhijianRegex)
     self.past_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.PastRegex)
     self.future_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.FutureRegex)
 def __init__(self):
     super().__init__(None)
     self.extractor = NumberWithUnitExtractor(
         ChineseDurationExtractorConfiguration())
     self.year_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.DurationYearRegex)
     self.half_suffix_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.DurationHalfSuffixRegex)
Beispiel #5
0
class ChineseDateExtractor(BaseDateExtractor):
    before_regex = RegExpUtility.get_safe_reg_exp(
        ChineseDateTime.BeforeRegex)
    after_regex = RegExpUtility.get_safe_reg_exp(
        ChineseDateTime.AfterRegex)
    date_time_period_unit_regex = RegExpUtility.get_safe_reg_exp(
        ChineseDateTime.DateTimePeriodUnitRegex)

    def __init__(self):
        super().__init__(ChineseDateExtractorConfiguration())
        self.duration_extractor = ChineseDurationExtractor()

    def extract(self, source: str, reference: datetime = None) -> List[ExtractResult]:
        if reference is None:
            reference = datetime.now()

        tokens = self.basic_regex_match(source)
        tokens.extend(self.implicit_date(source))
        tokens.extend(self.relative_duration_date(source, reference))

        result = merge_all_tokens(tokens, source, self.extractor_type_name)
        return result

    def relative_duration_date(self, source: str, reference: datetime) -> List[Token]:
        ret: List[Token] = list()
        duration_er = self.duration_extractor.extract(source, reference)

        for er in duration_er:
            if not regex.search(self.date_time_period_unit_regex, er.text):
                pos = er.start + er.length
                if pos < len(source):
                    suffix = source[pos]
                    before_match = RegExpUtility.get_matches(self.before_regex, suffix)
                    after_match = RegExpUtility.get_matches(self.after_regex, suffix)

                    if (before_match and suffix.startswith(before_match[0])) \
                            or (after_match and suffix.startswith(after_match[0])):
                        meta_data = MetaData()
                        meta_data.is_duration_with_ago_and_later = True
                        ret.append(Token(er.start, pos + 1, meta_data))

        return ret

    def basic_regex_match(self, source: str) -> List[Token]:
        ret: List[Token] = list()

        for regexp in self.config.date_regex_list:
            ret.extend(get_tokens_from_regex(regexp, source))

        return ret

    def implicit_date(self, source: str) -> List[Token]:
        ret: List[Token] = list()

        for regexp in self.config.implicit_date_list:
            ret.extend(get_tokens_from_regex(regexp, source))

        return ret
Beispiel #6
0
 def __init__(self):
     super().__init__(dict([
         (RegExpUtility.get_safe_reg_exp(
             ChineseDateTime.TimeRegexes1), TimeType.ChineseTime),
         (RegExpUtility.get_safe_reg_exp(
             ChineseDateTime.TimeRegexes2), TimeType.DigitTime),
         (RegExpUtility.get_safe_reg_exp(
             ChineseDateTime.TimeRegexes3), TimeType.LessTime)
     ]))
Beispiel #7
0
 def __init__(self):
     super().__init__(CultureInfo(Culture.Chinese))
     self._year_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.DurationYearRegex)
     self._half_suffix_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.DurationHalfSuffixRegex)
     self._extract_type = Constants.SYS_DATETIME_DURATION
     self._suffix_list = ChineseDateTime.DurationSuffixList
     self._prefix_list = dict()
     self._ambiguous_unit_list = ChineseDateTime.DurationAmbiguousUnits
 def __init__(self):
     self._date_point_extractor = ChineseDateExtractor()
     self._time_point_extractor = ChineseTimeExtractor()
     self._now_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.NowRegex)
     self._night_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.NightRegex)
     self._time_of_today_before_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.TimeOfTodayRegex)
     self._preposition_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.PrepositionRegex)
Beispiel #9
0
 def __init__(self):
     super().__init__(ChineseDateParserConfiguration())
     self.lunar_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.LunarRegex)
     self.special_date_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.SpecialDate)
     self.token_next_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.DateNextRe)
     self.token_last_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.DateLastRe)
     self.month_max_days: List[int] = [
         31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
Beispiel #10
0
 def __init__(self):
     self._last_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.SetLastRegex)
     self._each_prefix_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.SetEachPrefixRegex)
     self._each_unit_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.SetEachUnitRegex)
     self._each_day_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.SetEachDayRegex)
     self._before_each_day_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.SetEachDayRegex)
     self._duration_extractor = ChineseDurationExtractor()
     self._time_extractor = ChineseTimeExtractor()
     self._date_extractor = ChineseDateExtractor()
     self._date_time_extractor = ChineseDateTimeExtractor()
Beispiel #11
0
 def __init__(self):
     self._before_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.MergedBeforeRegex)
     self._after_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.MergedAfterRegex)
     self._since_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.MergedAfterRegex)
     self._date_parser = ChineseDateParser()
     self._holiday_parser = ChineseHolidayParser()
     self._time_parser = ChineseTimeParser()
     self._date_time_parser = ChineseDateTimeParser()
     self._date_period_parser = ChineseDatePeriodParser()
     self._time_period_parser = ChineseTimePeriodParser()
     self._date_time_period_parser = ChineseDateTimePeriodParser()
     self._duration_parser = ChineseDurationParser()
     self._set_parser = ChineseSetParser()
 def __init__(self):
     super().__init__(ChineseDatePeriodParserConfiguration())
     self.integer_extractor = ChineseIntegerExtractor()
     self.number_parser = CJKNumberParser(ChineseNumberParserConfiguration())
     self.year_in_chinese_regex = RegExpUtility.get_safe_reg_exp(ChineseDateTime.DatePeriodYearInChineseRegex)
     self.number_combined_with_unit_regex = RegExpUtility.get_safe_reg_exp(ChineseDateTime.NumberCombinedWithUnit)
     self.unit_regex = RegExpUtility.get_safe_reg_exp(ChineseDateTime.UnitRegex)
     self.year_and_month_regex = RegExpUtility.get_safe_reg_exp(ChineseDateTime.YearAndMonth)
     self.pure_number_year_and_month_regex = RegExpUtility.get_safe_reg_exp(ChineseDateTime.PureNumYearAndMonth)
     self.year_to_year_regex = RegExpUtility.get_safe_reg_exp(ChineseDateTime.YearToYear)
     self.year_to_year_suffix_required = RegExpUtility.get_safe_reg_exp(ChineseDateTime.YearToYearSuffixRequired)
     self.chinese_year_regex = RegExpUtility.get_safe_reg_exp(ChineseDateTime.DatePeriodYearInChineseRegex)
     self.season_with_year_regex = RegExpUtility.get_safe_reg_exp(ChineseDateTime.SeasonWithYear)
Beispiel #13
0
 def __init__(self):
     super().__init__()
     self._datetime_period_unit_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.DateTimePeriodUnitRegex
     )
     self._after_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.AfterRegex
     )
     self._before_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.BeforeRegex
     )
     self._date_point_extractor = ChineseDateExtractor()
     self._time_point_extractor = ChineseTimeExtractor()
     self._now_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.NowRegex)
     self._night_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.NightRegex)
     self._time_of_today_before_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.TimeOfTodayRegex)
     self._preposition_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.PrepositionRegex)
     # TODO When the implementation for these properties is added, change the None values to their respective Regexps
     self._year_regex = None
     self._year_suffix = None
     self._suffix_after_regex = None
     self._date_number_connector_regex = None
     self._number_as_time_regex = None
 def __init__(self):
     self._after_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.AfterRegex)
     self._before_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.BeforeRegex)
     self._specific_time_of_day_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.SpecificTimeOfDayRegex)
     self._time_of_day_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.TimeOfDayRegex)
     self._past_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.PastRegex)
     self._future_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.FutureRegex)
     self._relative_time_unit_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.TimeOfDayRegex)
     self._unit_map = ChineseDateTime.ParserConfigurationUnitMap
     self._date_extractor = ChineseDateExtractor()
     self._time_extractor = ChineseTimeExtractor()
     self._date_time_extractor = ChineseDateTimeExtractor()
     self._time_period_extractor = ChineseTimePeriodExtractor()
     self._date_parser = ChineseDateParser()
     self._time_parser = ChineseTimeParser()
     self._date_time_parser = ChineseDateTimeParser()
     self._time_period_parser = ChineseTimePeriodParser()
     self._check_both_before_after = None
     self._token_before_date = None
     self._prefix_day_regex = None
     self._am_desc_regex = None
     self._pm_desc_regex = None
     self._cardinal_extractor = None
     self._previous_prefix_regex = None
     self._within_next_prefix_regex = None
     self._future_suffix_regex = None
 def __init__(self):
     self._specific_time_of_day_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.SpecificTimeOfDayRegex)
     self._past_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.PastRegex)
     self._future_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.FutureRegex)
     self._relative_time_unit_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.TimeOfDayRegex)
     self._unit_map = ChineseDateTime.ParserConfigurationUnitMap
     self._date_extractor = ChineseDateExtractor()
     self._time_extractor = ChineseTimeExtractor()
     self._date_time_extractor = ChineseDateTimeExtractor()
     self._time_period_extractor = ChineseTimePeriodExtractor()
     self._date_parser = ChineseDateParser()
     self._time_parser = ChineseTimeParser()
     self._date_time_parser = ChineseDateTimeParser()
     self._time_period_parser = ChineseTimePeriodParser()
Beispiel #16
0
 def __init__(self):
     super().__init__(None)
     self.only_digit_match = RegExpUtility.get_safe_reg_exp('\\d+')
     self.numbers_map = ChineseDateTime.TimeNumberDictionary
     self.low_bound_map = ChineseDateTime.TimeLowBoundDesc
     self.function_map = {
         TimeType.ChineseTime: self.handle_chinese,
         TimeType.DigitTime: self.handle_digit,
         TimeType.LessTime: self.handle_less
     }
     self.inner_extractor = ChineseTimeExtractor()
Beispiel #17
0
 def __init__(self):
     super().__init__()
     self._cardinal_extractor = ChineseCardinalExtractor()
     self._single_date_extractor = ChineseDateExtractor()
     self._single_time_extractor = ChineseTimeExtractor()
     self._single_date_time_extractor = ChineseDateTimeExtractor()
     self._preposition_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.DateTimePeriodPrepositionRegex)
     self._till_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.DateTimePeriodTillRegex)
     self._specific_time_of_day_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.SpecificTimeOfDayRegex)
     self._time_of_day_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.TimeOfDayRegex)
     self._followed_unit = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.DateTimePeriodFollowedUnit)
     self._time_unit_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.DateTimePeriodUnitRegex)
     # TODO When the implementation for these properties is added, change the None values to their respective Regexps
     self._suffix_regex = None
     self._after_regex = None
     self._before_regex = None
     self._prefix_day_regex = None
     self._pm_desc_regex = None
     self._am_desc_regex = None
     self._date_unit_regex = None
     self._future_suffix_regex = None
     self._within_next_prefix_regex = None
     self._token_before_date = None
Beispiel #18
0
 def __init__(self):
     self._equal_regex = RegExpUtility.get_safe_reg_exp(
         BaseDateTime.EqualRegex)
     self._since_suffix_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.ParserConfigurationSinceSuffix)
     self._since_prefix_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.ParserConfigurationSincePrefix)
     self._until_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.ParserConfigurationUntil)
     self._after_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.AfterRegex)
     self._before_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.BeforeRegex)
     self._ambiguity_filters_dict = ChineseDateTime.AmbiguityFiltersDict
     self._date_extractor = ChineseDateExtractor()
     self._time_extractor = ChineseTimeExtractor()
     self._date_time_extractor = ChineseDateTimeExtractor()
     self._date_period_extractor = ChineseDatePeriodExtractor()
     self._time_period_extractor = ChineseTimePeriodExtractor()
     self._date_time_period_extractor = ChineseDateTimePeriodExtractor()
     self._holiday_extractor = BaseHolidayExtractor(
         ChineseHolidayExtractorConfiguration())
     self._duration_extractor = ChineseDurationExtractor()
     self._set_extractor = ChineseSetExtractor()
     # TODO When the implementation for these properties is added, change the None values to their respective Regexps
     self._superfluous_word_matcher = None
     self._fail_fast_regex = None
     self._unspecified_date_period_regex = None
     self._suffix_after_regex = None
     self._potential_ambiguous_range_regex = None
     self._ambiguous_range_modifier_prefix = None
     self._around_regex = None
     self._term_filter_regexes = None
     self._datetime_alt_extractor = None
     self._time_zone_extractor = None
 def __init__(self):
     self._equal_regex = RegExpUtility.get_safe_reg_exp(BaseDateTime.EqualRegex)
     self._year_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.YearRegex
     )
     self._before_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.MergedBeforeRegex)
     self._after_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.MergedAfterRegex)
     self._since_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.MergedAfterRegex)
     self._date_parser = ChineseDateParser()
     self._holiday_parser = ChineseHolidayParser()
     self._time_parser = ChineseTimeParser()
     self._date_time_parser = ChineseDateTimeParser()
     self._date_period_parser = ChineseDatePeriodParser()
     self._time_period_parser = ChineseTimePeriodParser()
     self._date_time_period_parser = ChineseDateTimePeriodParser()
     self._duration_parser = ChineseDurationParser()
     self._set_parser = ChineseSetParser()
     # TODO When the implementation for these properties is added, change the None values to their respective Regexps
     self._around_regex = None
     self._suffix_after = None
 def __init__(self):
     super().__init__(ChineseDateTimePeriodParserConfiguration())
     self.tmo_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.DateTimePeriodMORegex)
     self.tmi_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.DateTimePeriodMIRegex)
     self.taf_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.DateTimePeriodAFRegex)
     self.tev_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.DateTimePeriodEVRegex)
     self.tni_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.DateTimePeriodNIRegex)
     self.unit_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.DateTimePeriodUnitRegex)
     self.time_of_day_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.TimeOfDayRegex)
     self.cardinal_extractor = ChineseCardinalExtractor()
     self.cardinal_parser = CJKNumberParser(
         ChineseNumberParserConfiguration())
 def __init__(self):
     self._cardinal_extractor = ChineseCardinalExtractor()
     self._single_date_extractor = ChineseDateExtractor()
     self._single_time_extractor = ChineseTimeExtractor()
     self._single_date_time_extractor = ChineseDateTimeExtractor()
     self._preposition_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.DateTimePeriodPrepositionRegex)
     self._till_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.DateTimePeriodTillRegex)
     self._specific_time_of_day_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.SpecificTimeOfDayRegex)
     self._time_of_day_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.TimeOfDayRegex)
     self._followed_unit = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.DateTimePeriodFollowedUnit)
     self._time_unit_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.DateTimePeriodUnitRegex)
Beispiel #22
0
 def __init__(self):
     self._holiday_regexes = [
         RegExpUtility.get_safe_reg_exp(ChineseDateTime.HolidayRegexList1),
         RegExpUtility.get_safe_reg_exp(ChineseDateTime.HolidayRegexList2),
         RegExpUtility.get_safe_reg_exp(ChineseDateTime.LunarHolidayRegex)
     ]
 def __init__(self):
     self._complex_dateperiod_regex = None
     self._relative_decade_regex = None
     self._relative_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.RelativeRegex)
     self._date_extractor = ChineseDateExtractor()
     self._date_parser = ChineseDateParser()
     self._duration_extractor = ChineseDurationExtractor()
     self._simple_cases_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.SimpleCasesRegex)
     self._one_word_period_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.OneWordPeriodRegex)
     self._year_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.YearRegex)
     self._past_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.PastRegex)
     self._future_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.FutureRegex)
     self._week_of_month_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.WeekOfMonthRegex)
     self._quarter_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.QuarterRegex)
     self._season_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.SeasonRegex)
     self._next_prefix_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.DatePeriodNextRegex)
     self._past_prefix_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.DatePeriodLastRegex)
     self._this_prefix_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.DatePeriodThisRegex)
     self._later_early_period_regex = RegExpUtility.get_safe_reg_exp(r'\0')
     self._week_with_week_day_range_regex = RegExpUtility.get_safe_reg_exp(
         r'\0')
     self._token_before_date = ' on '
     self._day_of_month = ChineseDateTime.ParserConfigurationDayOfMonth
     self._month_of_year = ChineseDateTime.ParserConfigurationMonthOfYear
     self._cardinal_map = ChineseDateTime.ParserConfigurationCardinalMap
     self._season_map = ChineseDateTime.ParserConfigurationSeasonMap
     self._unit_map = ChineseDateTime.ParserConfigurationUnitMap
     self._now_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.NowRegex)
     # TODO When the implementation for these properties is added, change the None values to their respective Regexps
     self._reference_date_period_regex = RegExpUtility.get_safe_reg_exp(
         r'\0')
     self._decade_with_century_regex = None
     self._later_regex = None
     self._ago_regex = None
    def __init__(self):
        self._date_regex = [
            RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList1),
            RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList2),
            RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList3),
            RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList4),
            RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList5)
        ]

        if ChineseDateTime.DefaultLanguageFallback == Constants.DEFAULT_LANGUAGE_FALLBACK_DMY:
            self._date_regex.append(
                RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList7))
            self._date_regex.append(
                RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList6))
        else:
            self._date_regex.append(
                RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList6))
            self._date_regex.append(
                RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList7))

        self._date_regex.append(
            RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList8))

        self._month_of_year = ChineseDateTime.ParserConfigurationMonthOfYear
        self._day_of_month = ChineseDateTime.ParserConfigurationDayOfMonth
        self._day_of_week = ChineseDateTime.ParserConfigurationDayOfWeek
        self._special_day_regex = RegExpUtility.get_safe_reg_exp(
            ChineseDateTime.SpecialDayRegex)
        self._special_day_with_num_regex = RegExpUtility.get_safe_reg_exp(
            ChineseDateTime.SpecialDayWithNumRegex)
        self._this_regex = RegExpUtility.get_safe_reg_exp(
            ChineseDateTime.DateThisRegex)
        self._next_regex = RegExpUtility.get_safe_reg_exp(
            ChineseDateTime.DateNextRegex)
        self._last_regex = RegExpUtility.get_safe_reg_exp(
            ChineseDateTime.DateLastRegex)
        self._unit_regex = RegExpUtility.get_safe_reg_exp(
            ChineseDateTime.DateUnitRegex)
        self._unit_map = ChineseDateTime.ParserConfigurationUnitMap
        self._week_day_of_month_regex = RegExpUtility.get_safe_reg_exp(
            ChineseDateTime.WeekDayOfMonthRegex)
        self._week_day_regex = RegExpUtility.get_safe_reg_exp(
            ChineseDateTime.WeekDayRegex)
        self._integer_extractor = ChineseIntegerExtractor()
        self._number_parser = CJKNumberParser(
            ChineseNumberParserConfiguration())
class ChineseDateTimeExtractor(BaseDateTimeExtractor):
    before_regex = RegExpUtility.get_safe_reg_exp(ChineseDateTime.BeforeRegex)
    after_regex = RegExpUtility.get_safe_reg_exp(ChineseDateTime.AfterRegex)
    date_time_period_unit_regex = RegExpUtility.get_safe_reg_exp(
        ChineseDateTime.DateTimePeriodUnitRegex)

    def __init__(self):
        super().__init__(ChineseDateTimeExtractorConfiguration())
        self.duration_extractor = ChineseDurationExtractor()

    def extract(self,
                source: str,
                reference: datetime = None) -> List[ExtractResult]:

        if reference is None:
            reference = datetime.now()

        tokens: List[Token] = list()
        tokens.extend(self.merge_date_and_time(source, reference))
        tokens.extend(self.basic_regex_match(source))
        tokens.extend(self.time_of_today(source, reference))
        tokens.extend(self.duration_with_ago_and_later(source, reference))

        result = merge_all_tokens(tokens, source, self.extractor_type_name)
        return result

    def merge_date_and_time(self, source: str,
                            reference: datetime) -> List[Token]:
        tokens: List[Token] = list()
        ers: List[ExtractResult] = self.config.date_point_extractor.extract(
            source, reference)

        if len(ers) < 1:
            return tokens

        ers.extend(self.config.time_point_extractor.extract(source, reference))

        if len(ers) < 2:
            return tokens

        ers = sorted(ers, key=lambda x: x.start)
        i = 0

        while i < len(ers) - 1:
            j = i + 1

            while j < len(ers) and ers[i].overlap(ers[j]):
                j += 1

            if j >= len(ers):
                break

            if ers[i].type is Constants.SYS_DATETIME_DATE and ers[
                    j].type is Constants.SYS_DATETIME_TIME:
                middle_begin = ers[i].start + ers[i].length
                middle_end = ers[j].start

                if middle_begin > middle_end:
                    continue

                middle = source[middle_begin:middle_end].strip().lower()

                if self.config.is_connector_token(middle):
                    begin = ers[i].start
                    end = ers[j].start + ers[j].length
                    tokens.append(Token(begin, end))
                i = j + 1
                continue
            i = j

        return tokens

    def time_of_today(self, source: str, reference: datetime) -> List[Token]:
        tokens: List[Token] = list()
        ers = self.config.time_point_extractor.extract(source, reference)

        for er in ers:
            before = source[:er.start]
            inner_match = regex.search(self.config.night_regex, er.text)

            if inner_match is not None and inner_match.start() == 0:
                before = source[:er.start + len(inner_match.group())]

            if not before:
                continue

            match = regex.search(self.config.time_of_today_before_regex,
                                 before)
            if match is not None and not before[match.end():].strip():
                begin = match.start()
                end = er.start + er.length
                tokens.append(Token(begin, end))

        return tokens

    def duration_with_ago_and_later(self, source: str,
                                    reference: datetime) -> List[Token]:
        ret: List[Token] = list()
        duration_er = self.duration_extractor.extract(source, reference)

        for er in duration_er:
            pos = er.start + er.length
            if pos < len(source):
                suffix = source[pos]
                before_match = RegExpUtility.get_matches(
                    self.before_regex, suffix)
                after_match = RegExpUtility.get_matches(
                    self.after_regex, suffix)

                if (before_match and suffix.startswith(before_match[0])) \
                        or (after_match and suffix.startswith(after_match[0])):
                    meta_data = MetaData()
                    meta_data.is_duration_with_ago_and_later = True
                    ret.append(Token(er.start, pos + 1, meta_data))
        return ret
    def __init__(self):
        self._season_regex = RegExpUtility.get_safe_reg_exp(
            ChineseDateTime.SeasonRegex
        )
        self._month_suffix_regex = RegExpUtility.get_safe_reg_exp(
            ChineseDateTime.MonthSuffixRegex
        )
        self._year_regex_in_number = RegExpUtility.get_safe_reg_exp(
            ChineseDateTime.YearRegexInNumber
        )
        self._strict_year_regex = RegExpUtility.get_safe_reg_exp(
            ChineseDateTime.StrictYearRegex
        )
        self._last_regex = RegExpUtility.get_safe_reg_exp(
            ChineseDateTime.DatePeriodLastRegex
        )
        self._next_regex = RegExpUtility.get_safe_reg_exp(
            ChineseDateTime.DatePeriodNextRegex
        )
        self._this_regex = RegExpUtility.get_safe_reg_exp(
            ChineseDateTime.DatePeriodThisRegex
        )
        self._month_regex = RegExpUtility.get_safe_reg_exp(
            ChineseDateTime.MonthRegex
        )
        self._zero_to_nine_integer_regex_chinese = RegExpUtility.get_safe_reg_exp(
            ChineseDateTime.ZeroToNineIntegerRegexCJK
        )
        self._relative_month_regex = RegExpUtility.get_safe_reg_exp(
            ChineseDateTime.RelativeMonthRegex
        )
        self._day_regex_in_chinese = RegExpUtility.get_safe_reg_exp(
            ChineseDateTime.DatePeriodDayRegexInCJK
        )
        self._day_regex = RegExpUtility.get_safe_reg_exp(
            ChineseDateTime.DayRegex
        )
        self._simple_cases_regexes = [
            RegExpUtility.get_safe_reg_exp(ChineseDateTime.SimpleCasesRegex),
            RegExpUtility.get_safe_reg_exp(ChineseDateTime.OneWordPeriodRegex),
            RegExpUtility.get_safe_reg_exp(ChineseDateTime.StrictYearRegex),
            RegExpUtility.get_safe_reg_exp(ChineseDateTime.YearToYear),
            RegExpUtility.get_safe_reg_exp(
                ChineseDateTime.YearToYearSuffixRequired),
            RegExpUtility.get_safe_reg_exp(ChineseDateTime.MonthToMonth),
            RegExpUtility.get_safe_reg_exp(ChineseDateTime.MonthToMonthSuffixRequired),
            RegExpUtility.get_safe_reg_exp(ChineseDateTime.YearAndMonth),
            RegExpUtility.get_safe_reg_exp(
                ChineseDateTime.PureNumYearAndMonth),
            RegExpUtility.get_safe_reg_exp(
                ChineseDateTime.DatePeriodYearInCJKRegex),
            RegExpUtility.get_safe_reg_exp(ChineseDateTime.WeekOfMonthRegex),
            RegExpUtility.get_safe_reg_exp(ChineseDateTime.SeasonWithYear),
            RegExpUtility.get_safe_reg_exp(ChineseDateTime.QuarterRegex),
            RegExpUtility.get_safe_reg_exp(ChineseDateTime.DecadeRegex)
        ]
        self._illegal_year_regex = RegExpUtility.get_safe_reg_exp(
            BaseDateTime.IllegalYearRegex)
        self._year_regex = RegExpUtility.get_safe_reg_exp(
            ChineseDateTime.YearRegex)
        self._till_regex = RegExpUtility.get_safe_reg_exp(
            ChineseDateTime.DatePeriodTillRegex)
        self._followed_unit = RegExpUtility.get_safe_reg_exp(
            ChineseDateTime.FollowedUnit)
        self._number_combined_with_unit = RegExpUtility.get_safe_reg_exp(
            ChineseDateTime.NumberCombinedWithUnit)
        self._past_regex = RegExpUtility.get_safe_reg_exp(
            ChineseDateTime.PastRegex)
        self._future_regex = RegExpUtility.get_safe_reg_exp(
            ChineseDateTime.FutureRegex)
        self._date_point_extractor = ChineseDateExtractor()
        self._integer_extractor = ChineseNumberExtractor()
        self._number_parser = BaseNumberParser(
            ChineseNumberParserConfiguration())
        self._now_regex = RegExpUtility.get_safe_reg_exp(
            ChineseDateTime.NowRegex)
        self._month_num_regex = RegExpUtility.get_safe_reg_exp(
            ChineseDateTime.MonthNumRegex)
        self._cardinal_extractor = ChineseCardinalExtractor()
        self._ordinal_extractor = ChineseOrdinalExtractor()

        # TODO When the implementation for these properties is added, change the None values to their respective Regexps
        self._previous_prefix_regex = None
        self._check_both_before_after = None
        self._century_suffix_regex = None
        self._year_period_regex = None
        self._duration_date_restrictions = None
        self._more_than_regex = None
        self._less_than_regex = None
        self._later_regex = None
        self._ago_regex = None
        self._future_suffix_regex = None
        self._within_next_prefix_regex = None
        self._time_unit_regex = None
        self._previous_prefix_regex = None
Beispiel #27
0
 def __init__(self, options: DateTimeOptions):
     super().__init__(ChineseMergedExtractorConfiguration(), options)
     self.day_of_month_regex = RegExpUtility.get_safe_reg_exp(
         '^\\d{1,2}号', regex.I)
Beispiel #28
0
 def __init__(self, regex_true, regex_false, token_regex, only_top_match):
     self.regex_true = RegExpUtility.get_safe_reg_exp(regex_true)
     self.regex_false = RegExpUtility.get_safe_reg_exp(regex_false)
     self.token_regex = RegExpUtility.get_safe_reg_exp(token_regex)
     self.only_top_match = only_top_match
 def __init__(self):
     self._simple_cases_regexes = [
         RegExpUtility.get_safe_reg_exp(ChineseDateTime.SimpleCasesRegex),
         RegExpUtility.get_safe_reg_exp(ChineseDateTime.OneWordPeriodRegex),
         RegExpUtility.get_safe_reg_exp(ChineseDateTime.StrictYearRegex),
         RegExpUtility.get_safe_reg_exp(ChineseDateTime.YearToYear),
         RegExpUtility.get_safe_reg_exp(
             ChineseDateTime.YearToYearSuffixRequired),
         RegExpUtility.get_safe_reg_exp(ChineseDateTime.YearAndMonth),
         RegExpUtility.get_safe_reg_exp(
             ChineseDateTime.PureNumYearAndMonth),
         RegExpUtility.get_safe_reg_exp(
             ChineseDateTime.DatePeriodYearInChineseRegex),
         RegExpUtility.get_safe_reg_exp(ChineseDateTime.WeekOfMonthRegex),
         RegExpUtility.get_safe_reg_exp(ChineseDateTime.SeasonWithYear),
         RegExpUtility.get_safe_reg_exp(ChineseDateTime.QuarterRegex),
     ]
     self._illegal_year_regex = RegExpUtility.get_safe_reg_exp(
         BaseDateTime.IllegalYearRegex)
     self._year_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.YearRegex)
     self._till_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.DatePeriodTillRegex)
     self._followed_unit = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.FollowedUnit)
     self._number_combined_with_unit = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.NumberCombinedWithUnit)
     self._past_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.PastRegex)
     self._future_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.FutureRegex)
     self._date_point_extractor = ChineseDateExtractor()
     self._integer_extractor = ChineseNumberExtractor()
     self._number_parser = BaseNumberParser(
         ChineseNumberParserConfiguration())
    def __init__(self):
        self._date_regex_list = [
            RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList1),
            RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList2),
            RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList3),
            RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList4),
            RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList5)
        ]

        if ChineseDateTime.DefaultLanguageFallback == Constants.DEFAULT_LANGUAGE_FALLBACK_DMY:
            self._date_regex_list.append(
                RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList7))
            self._date_regex_list.append(
                RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList6))
        else:
            self._date_regex_list.append(
                RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList6))
            self._date_regex_list.append(
                RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList7))

        self._date_regex_list.append(
            RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateRegexList8))

        self._implicit_date_list = [
            RegExpUtility.get_safe_reg_exp(ChineseDateTime.LunarRegex),
            RegExpUtility.get_safe_reg_exp(ChineseDateTime.SpecialDayRegex),
            RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateThisRegex),
            RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateLastRegex),
            RegExpUtility.get_safe_reg_exp(ChineseDateTime.DateNextRegex),
            RegExpUtility.get_safe_reg_exp(ChineseDateTime.WeekDayRegex),
            RegExpUtility.get_safe_reg_exp(
                ChineseDateTime.WeekDayOfMonthRegex),
            RegExpUtility.get_safe_reg_exp(ChineseDateTime.SpecialDate)
        ]