def __init__(self):
     self._later_regex = RegExpUtility.get_safe_reg_exp(
         SpanishDateTime.LaterRegex)
     self._ago_regex = RegExpUtility.get_safe_reg_exp(
         SpanishDateTime.AgoRegex)
     self._in_connector_regex = RegExpUtility.get_safe_reg_exp(
         SpanishDateTime.InConnectorRegex)
     self._range_unit_regex = RegExpUtility.get_safe_reg_exp(
         SpanishDateTime.RangeUnitRegex)
     self._am_desc_regex = RegExpUtility.get_safe_reg_exp(
         SpanishDateTime.AmDescRegex)
     self._pm_desc__regex = RegExpUtility.get_safe_reg_exp(
         SpanishDateTime.PmDescRegex)
     self._am_pm_desc_regex = RegExpUtility.get_safe_reg_exp(
         SpanishDateTime.AmPmDescRegex)
     self._time_unit_regex = RegExpUtility.get_safe_reg_exp(
         SpanishDateTime.TimeUnitRegex)
     self._within_next_prefix_regex = RegExpUtility.get_safe_reg_exp(
         SpanishDateTime.WithinNextPrefixRegex)
     self._common_date_prefix_regex = RegExpUtility.get_safe_reg_exp(
         SpanishDateTime.CommonDatePrefixRegex)
    def __init__(self, mode):
        self.__regexes = [
            ReVal(re=RegExpUtility.get_safe_reg_exp(
                PortugueseNumeric.FractionNotationWithSpacesRegex),
                  val='FracNum'),
            ReVal(re=RegExpUtility.get_safe_reg_exp(
                PortugueseNumeric.FractionNotationRegex),
                  val='FracNum'),
            ReVal(re=RegExpUtility.get_safe_reg_exp(
                PortugueseNumeric.FractionNounRegex),
                  val='FracPor'),
            ReVal(re=RegExpUtility.get_safe_reg_exp(
                PortugueseNumeric.FractionNounWithArticleRegex),
                  val='FracPor')
        ]

        if mode != NumberMode.Unit:
            self.__regexes.append(
                ReVal(re=RegExpUtility.get_safe_reg_exp(
                    PortugueseNumeric.FractionPrepositionRegex),
                      val='FracPor'))
    def __init__(self, culture_info=None):
        if culture_info is None:
            culture_info = CultureInfo(Culture.Chinese)

        self._culture_info = culture_info
        self._lang_marker = ChineseNumeric.LangMarker
        self._decimal_separator_char = ChineseNumeric.DecimalSeparatorChar
        self._fraction_marker_token = ChineseNumeric.FractionMarkerToken
        self._non_decimal_separator_char = ChineseNumeric.NonDecimalSeparatorChar
        self._half_a_dozen_text = ChineseNumeric.HalfADozenText
        self._word_separator_token = ChineseNumeric.WordSeparatorToken

        self._round_number_map = ChineseNumeric.RoundNumberMap
        self._digital_number_regex = RegExpUtility.get_safe_reg_exp(ChineseNumeric.DigitalNumberRegex)

        self.zero_to_nine_map_chs = ChineseNumeric.ZeroToNineMap
        self.round_number_map_chs = ChineseNumeric.RoundNumberMapChar
        self.full_to_half_map_chs = ChineseNumeric.FullToHalfMap
        self.trato_sim_map_chs = ChineseNumeric.TratoSimMap
        self.unit_map_chs = ChineseNumeric.UnitMap
        self.round_direct_list_chs = ChineseNumeric.RoundDirectList

        self.digit_num_regex = ChineseNumeric.DigitNumRegex
        self.dozen_regex = ChineseNumeric.DozenRegex
        self.percentage_regex = ChineseNumeric.PercentageRegex
        self.double_and_round_chs_regex = RegExpUtility.get_safe_reg_exp(ChineseNumeric.DoubleAndRoundRegex)
        self.frac_split_regex = RegExpUtility.get_safe_reg_exp(ChineseNumeric.FracSplitRegex)
        self._negative_number_sign_regex = RegExpUtility.get_safe_reg_exp(ChineseNumeric.NegativeNumberSignRegex)
        self.point_regex_chs = ChineseNumeric.PointRegex
        self.spe_get_number_regex = RegExpUtility.get_safe_reg_exp(ChineseNumeric.SpeGetNumberRegex)
        self.pair_regex = RegExpUtility.get_safe_reg_exp(ChineseNumeric.PairRegex)
Exemple #4
0
 def __init__(self, config: BaseDateParserConfiguration):
     self._duration_extractor = config.duration_extractor
     self._duration_parser = config.duration_parser
     self._time_extractor = config.time_extractor
     self._time_parser = config.time_parser
     self._date_extractor = config.date_extractor
     self._date_parser = config.date_parser
     self._date_time_extractor = config.date_time_extractor
     self._date_time_parser = config.date_time_parser
     self._date_period_extractor = config.date_period_extractor
     self._date_period_parser = config.date_period_parser
     self._time_period_extractor = config.time_period_extractor
     self._time_period_parser = config.time_period_parser
     self._date_time_period_extractor = config.date_time_period_extractor
     self._date_time_period_parser = config.date_time_period_parser
     self._unit_map = EnglishDateTime.UnitMap
     self._each_prefix_regex = RegExpUtility.get_safe_reg_exp(
         EnglishDateTime.EachPrefixRegex)
     self._periodic_regex = RegExpUtility.get_safe_reg_exp(
         EnglishDateTime.PeriodicRegex)
     self._each_unit_regex = RegExpUtility.get_safe_reg_exp(
         EnglishDateTime.EachUnitRegex)
     self._each_day_regex = RegExpUtility.get_safe_reg_exp(
         EnglishDateTime.EachDayRegex)
     self._set_week_day_regex = RegExpUtility.get_safe_reg_exp(
         EnglishDateTime.SetWeekDayRegex)
     self._set_each_regex = RegExpUtility.get_safe_reg_exp(
         EnglishDateTime.SetEachRegex)
Exemple #5
0
    def __init__(self, mode: NumberMode = NumberMode.DEFAULT):
        self.__negative_number_terms = RegExpUtility.get_safe_reg_exp(
            FrenchNumeric.NegativeNumberTermsRegex)
        self.__regexes: List[ReVal] = list()
        cardinal_ex: FrenchCardinalExtractor = None

        if mode is NumberMode.PURE_NUMBER:
            cardinal_ex = FrenchCardinalExtractor(
                FrenchNumeric.PlaceHolderPureNumber)
        elif mode is NumberMode.CURRENCY:
            self.__regexes.append(ReVal(re=RegExpUtility.get_safe_reg_exp(
                FrenchNumeric.CurrencyRegex), val='IntegerNum'))

        if cardinal_ex is None:
            cardinal_ex = FrenchCardinalExtractor()

        self.__regexes.extend(cardinal_ex.regexes)

        fraction_ex = FrenchFractionExtractor(mode)
        self.__regexes.extend(fraction_ex.regexes)

        ambiguity_filters_dict: List[ReRe] = list()

        if mode != NumberMode.Unit:
            for key, value in BaseNumbers.AmbiguityFiltersDict.items():
                ambiguity_filters_dict.append(ReRe(reKey=RegExpUtility.get_safe_reg_exp(key),
                                                   reVal=RegExpUtility.get_safe_reg_exp(value)))
            for key, value in FrenchNumeric.AmbiguityFiltersDict.items():
                ambiguity_filters_dict.append(ReRe(reKey=RegExpUtility.get_safe_reg_exp(key),
                                                   reVal=RegExpUtility.get_safe_reg_exp(value)))
        self.__ambiguity_filters_dict = ambiguity_filters_dict
Exemple #6
0
    def __init__(self, config: NumberParserConfiguration):
        self.config: NumberParserConfiguration = config
        self.supported_types: List[str] = list()

        single_int_frac = f'{self.config.word_separator_token}| -|{self._get_key_regex(self.config.cardinal_number_map.keys())}|{self._get_key_regex(self.config.ordinal_number_map.keys())}'
        self.text_number_regex: Pattern = self._get_text_number_regex(
            single_int_frac)
        self.arabic_number_regex: Pattern = RegExpUtility.get_safe_reg_exp(
            r'\d+', flags=regex.I | regex.S)
        self.round_number_set: List[str] = list(
            self.config.round_number_map.keys())
        self.is_non_standard_separator_variant = self.config.culture_info.code in \
            self.config.non_standard_separator_variants
Exemple #7
0
 def __init__(self):
     self._all_regex: Pattern = RegExpUtility.get_safe_reg_exp(
         FrenchDateTime.AllRegex)
     self._half_regex: Pattern = RegExpUtility.get_safe_reg_exp(
         FrenchDateTime.HalfRegex)
     self._followed_unit: Pattern = RegExpUtility.get_safe_reg_exp(
         FrenchDateTime.DurationFollowedUnit)
     self._number_combined_with_unit: Pattern = RegExpUtility.get_safe_reg_exp(
         FrenchDateTime.NumberCombinedWithDurationUnit)
     self._an_unit_regex: Pattern = RegExpUtility.get_safe_reg_exp(
         FrenchDateTime.AnUnitRegex)
     self._inexact_number_unit_regex: Pattern = RegExpUtility.get_safe_reg_exp(
         FrenchDateTime.InexactNumberUnitRegex)
     self._suffix_and_regex: Pattern = RegExpUtility.get_safe_reg_exp(
         FrenchDateTime.SuffixAndRegex)
     self._relative_duration_unit_regex: Pattern = RegExpUtility.get_safe_reg_exp(
         FrenchDateTime.RelativeDurationUnitRegex)
     self._more_than_regex: Pattern = RegExpUtility.get_safe_reg_exp(
         FrenchDateTime.MoreThanRegex)
     self._less_than_regex: Pattern = RegExpUtility.get_safe_reg_exp(
         FrenchDateTime.LessThanOneHour)
     self._cardinal_extractor: BaseNumberExtractor = FrenchCardinalExtractor()
Exemple #8
0
    def __skip_non_decimal_separator(self, ch: str, distance: int,
                                     culture: CultureInfo) -> bool:

        decimal_length: int = 3

        # Special cases for multi-language countries where decimal separators can be used interchangeably. Mostly informally.
        # Ex: South Africa, Namibia; Puerto Rico in ES; or in Canada for EN and FR.
        # "me pidio $5.00 prestados" and "me pidio $5,00 prestados" -> currency $5
        culture_regex: Pattern = RegExpUtility.get_safe_reg_exp(
            r'^(en|es|fr)(-)?\b', flags=regex.I | regex.S)

        return ch == self.config.non_decimal_separator_char and not (
            distance <= decimal_length and culture_regex.match(culture.code))
    def __init__(self, mode: NumberMode = NumberMode.DEFAULT):
        self.__negative_number_terms = RegExpUtility.get_safe_reg_exp(
            EnglishNumeric.NegativeNumberTermsRegex)
        self.__regexes: List[ReVal] = list()
        cardinal_ex: EnglishCardinalExtractor = None

        if mode is NumberMode.PURE_NUMBER:
            cardinal_ex = EnglishCardinalExtractor(
                EnglishNumeric.PlaceHolderPureNumber)
        elif mode is NumberMode.CURRENCY:
            self.__regexes.append(
                ReVal(re=RegExpUtility.get_safe_reg_exp(
                    EnglishNumeric.CurrencyRegex),
                      val='IntegerNum'))

        if cardinal_ex is None:
            cardinal_ex = EnglishCardinalExtractor()

        self.__regexes.extend(cardinal_ex.regexes)

        fraction_ex = EnglishFractionExtractor()
        self.__regexes.extend(fraction_ex.regexes)
    def __init__(self):
        super().__init__()
        self._single_time_extractor = BaseTimeExtractor(
            PortugueseTimeExtractorConfiguration())
        self._integer_extractor = PortugueseIntegerExtractor()
        self.utility_configuration = PortugueseDateTimeUtilityConfiguration()

        self._simple_cases_regex: List[Pattern] = [
            RegExpUtility.get_safe_reg_exp(PortugueseDateTime.PureNumFromTo),
            RegExpUtility.get_safe_reg_exp(
                PortugueseDateTime.PureNumBetweenAnd),
            RegExpUtility.get_safe_reg_exp(
                PortugueseDateTime.SpecificTimeFromTo),
            RegExpUtility.get_safe_reg_exp(
                PortugueseDateTime.SpecificTimeBetweenAnd)
        ]

        self._till_regex: Pattern = RegExpUtility.get_safe_reg_exp(
            PortugueseDateTime.TillRegex)
        self._time_of_day_regex: Pattern = RegExpUtility.get_safe_reg_exp(
            PortugueseDateTime.TimeOfDayRegex)
        self._general_ending_regex: Pattern = RegExpUtility.get_safe_reg_exp(
            PortugueseDateTime.GeneralEndingRegex)

        self.from_regex = RegExpUtility.get_safe_reg_exp(
            PortugueseDateTime.FromRegex)
        self.range_connector_regex = RegExpUtility.get_safe_reg_exp(
            PortugueseDateTime.RangeConnectorRegex)
        self.between_regex = RegExpUtility.get_safe_reg_exp(
            PortugueseDateTime.BetweenRegex)
        self._token_before_date = PortugueseDateTime.TokenBeforeDate
        self._pure_number_regex = [
            PortugueseDateTime.PureNumFromTo, PortugueseDateTime.PureNumFromTo
        ]
        self._options = DateTimeOptions.NONE
        self._time_zone_extractor = BaseTimeZoneExtractor(
            PortugueseTimeZoneExtractorConfiguration())
        self._check_both_before_after = PortugueseDateTime.CheckBothBeforeAfter
 def __init__(self):
     self.__regexes = [
         ReVal(
             re=RegExpUtility.get_safe_reg_exp(
                 FrenchNumeric.FractionNotationWithSpacesRegex),
             val='FracNum'),
         ReVal(
             re=RegExpUtility.get_safe_reg_exp(
                 FrenchNumeric.FractionNotationRegex),
             val='FracNum'),
         ReVal(
             re=RegExpUtility.get_safe_reg_exp(
                 FrenchNumeric.FractionNounRegex),
             val='FracFr'),
         ReVal(
             re=RegExpUtility.get_safe_reg_exp(
                 FrenchNumeric.FractionNounWithArticleRegex),
             val='FracFr'),
         ReVal(
             re=RegExpUtility.get_safe_reg_exp(
                 FrenchNumeric.FractionPrepositionRegex),
             val='FracFr')
     ]
Exemple #12
0
    def __init__(self, culture_info=None):
        if culture_info is None:
            culture_info = CultureInfo(Culture.Spanish)

        self._culture_info = culture_info
        self._lang_marker = SpanishNumeric.LangMarker
        self._decimal_separator_char = SpanishNumeric.DecimalSeparatorChar
        self._fraction_marker_token = SpanishNumeric.FractionMarkerToken
        self._non_decimal_separator_char = SpanishNumeric.NonDecimalSeparatorChar
        self._half_a_dozen_text = SpanishNumeric.HalfADozenText
        self._word_separator_token = SpanishNumeric.WordSeparatorToken

        self._written_decimal_separator_texts = SpanishNumeric.WrittenDecimalSeparatorTexts
        self._written_group_separator_texts = SpanishNumeric.WrittenGroupSeparatorTexts
        self._written_integer_separator_texts = SpanishNumeric.WrittenIntegerSeparatorTexts
        self._written_fraction_separator_texts = SpanishNumeric.WrittenFractionSeparatorTexts
        self._non_standard_separator_variants = SpanishNumeric.NonStandardSeparatorVariants
        self._is_multi_decimal_separator_culture = SpanishNumeric.MultiDecimalSeparatorCulture

        ordinal_number_map: Dict[str,
                                 int] = dict(SpanishNumeric.OrdinalNumberMap)
        for prefix_key in SpanishNumeric.PrefixCardinalMap:
            for suffix_key in SpanishNumeric.SuffixOrdinalMap:
                if not prefix_key + suffix_key in ordinal_number_map:
                    prefix_value = SpanishNumeric.PrefixCardinalMap[prefix_key]
                    suffix_value = SpanishNumeric.SuffixOrdinalMap[suffix_key]
                    ordinal_number_map[
                        prefix_key + suffix_key] = prefix_value * suffix_value
        self._cardinal_number_map = SpanishNumeric.CardinalNumberMap
        self._ordinal_number_map = ordinal_number_map
        self._round_number_map = SpanishNumeric.RoundNumberMap
        self._negative_number_sign_regex = RegExpUtility.get_safe_reg_exp(
            SpanishNumeric.NegativeNumberSignRegex)
        self._half_a_dozen_regex = RegExpUtility.get_safe_reg_exp(
            SpanishNumeric.HalfADozenRegex)
        self._digital_number_regex = RegExpUtility.get_safe_reg_exp(
            SpanishNumeric.DigitalNumberRegex)
 def adjust_by_suffix(self, suffix: str, adjust: AdjustParams):
     suffix = suffix.strip().lower()
     delta_hour = 0
     match = regex.search(self.time_suffix_full, suffix)
     if match is not None and match.start() == 0 and match.group(
     ) == suffix:
         oclock_str = RegExpUtility.get_group(match, 'oclock')
         if not oclock_str:
             am_str = RegExpUtility.get_group(match, 'am')
             if am_str:
                 if adjust.hour >= 12:
                     delta_hour -= 12
                 else:
                     adjust.has_am = True
             pm_str = RegExpUtility.get_group(match, 'pm')
             if pm_str:
                 if adjust.hour < 12:
                     delta_hour = 12
                 if regex.search(self.lunch_regex, pm_str):
                     # for hour >= 10 and < 12
                     if 10 <= adjust.hour <= 12:
                         delta_hour = 0
                         if adjust.hour == 12:
                             adjust.has_pm = True
                         else:
                             adjust.has_am = True
                     else:
                         adjust.has_pm = True
                 elif regex.search(self.night_regex, pm_str):
                     if adjust.hour <= 3 or adjust.hour == 12:
                         if adjust.hour == 12:
                             adjust.hour = 0
                         delta_hour = 0
                         adjust.has_am = True
                     else:
                         adjust.has_pm = True
     adjust.hour = (adjust.hour + delta_hour) % 24
Exemple #14
0
    def match_simple_cases(self, source: str) -> List[Token]:
        result = []

        for regexp in self.config.simple_cases_regex:
            matches = regex.finditer(regexp, source)

            if matches:
                for match in matches:

                    # Cases like "from 10:30 to 11", don't necessarily need "am/pm"
                    if RegExpUtility.get_group(match, Constants.MINUTE_GROUP_NAME) or\
                            RegExpUtility.get_group(match, Constants.SECOND_GROUP_NAME):

                        # Cases like "from 3:30 to 4" should be supported
                        # Cases like "from 3:30 to 5 on 1/1/2015" should be supported
                        # Cases like "from 3:30 to 4 people" is considered not valid
                        end_with_valid_token = False

                        # "No extra tokens after the time period"
                        if (source.index(match.group()) + (match.end() - match.start())) == len(source):
                            end_with_valid_token = True

                        else:
                            after_str = source[source.index(match.group()) + (match.end() - match.start()):]

                            end_with_general_endings = self.config.general_ending_regex.match(after_str)
                            end_with_am_pm = RegExpUtility.get_group(match, Constants.RIGHT_AM_PM_GROUP_NAME)

                            if end_with_general_endings or end_with_am_pm or\
                                    after_str.lstrip().startswith(self.config.token_before_date):
                                end_with_valid_token = True
                            elif (self.config.options & DateTimeOptions.ENABLE_PREVIEW) != 0:
                                # When TimeZone be migrated enable it
                                end_with_valid_token = False

                        if end_with_valid_token:
                            result.append(Token(source.index(match.group()), source.index(match.group()) +
                                                (match.end() - match.start())))
                    else:
                        # Is there "pm" or "am"?
                        match_pm_str = RegExpUtility.get_group(match, Constants.PM_GROUP_NAME)
                        match_am_str = RegExpUtility.get_group(match, Constants.AM_GROUP_NAME)
                        desc_str = RegExpUtility.get_group(match, Constants.DESC_GROUP_NAME)

                        # Check "pm", "am"
                        if match_pm_str or match_am_str or desc_str:
                            result.append(Token(source.index(match.group()), source.index(match.group()) +
                                                (match.end() - match.start())))
                        else:
                            after_str = source[source.index(match.group()) + (match.end() - match.start()):]

                            # When TimeZone be migrated enable it
                            if (self.config.options & DateTimeOptions.ENABLE_PREVIEW) != 0:
                                result.append(Token(source.index(match.group()),
                                                    source.index(match.group()) + (match.end() - match.start())))

        return result
 def __init__(self, config):
     self._cardinal_extractor: BaseNumberExtractor = EnglishCardinalExtractor()
     self._number_parser: BaseNumberParser = BaseNumberParser(EnglishNumberParserConfiguration())
     self._followed_unit: Pattern = RegExpUtility.get_safe_reg_exp(EnglishDateTime.DurationFollowedUnit)
     self._suffix_and_regex: Pattern = RegExpUtility.get_safe_reg_exp(EnglishDateTime.SuffixAndRegex)
     self._number_combined_with_unit: Pattern = RegExpUtility.get_safe_reg_exp(EnglishDateTime.NumberCombinedWithDurationUnit)
     self._an_unit_regex: Pattern = RegExpUtility.get_safe_reg_exp(EnglishDateTime.AnUnitRegex)
     self._all_date_unit_regex: Pattern = RegExpUtility.get_safe_reg_exp(EnglishDateTime.AllRegex)
     self._half_date_unit_regex: Pattern = RegExpUtility.get_safe_reg_exp(EnglishDateTime.HalfRegex)
     self._inexact_number_unit_regex: Pattern = RegExpUtility.get_safe_reg_exp(EnglishDateTime.InexactNumberUnitRegex)
     self._unit_map: Dict[str, int] = EnglishDateTime.UnitMap
     self._unit_value_map: Dict[str, int] = EnglishDateTime.UnitValueMap
     self._double_numbers: Dict[str, float] = EnglishDateTime.DoubleNumbers
Exemple #16
0
    def __init__(self, config: BaseDateParserConfiguration):
        self._token_before_date = SpanishDateTime.TokenBeforeDate
        self._token_before_time = SpanishDateTime.TokenBeforeTime
        self._now_regex = RegExpUtility.get_safe_reg_exp(
            SpanishDateTime.NowRegex)
        self._am_time_regex = RegExpUtility.get_safe_reg_exp(
            SpanishDateTime.AmTimeRegex)
        self._pm_time_regex = RegExpUtility.get_safe_reg_exp(
            SpanishDateTime.PmTimeRegex)
        self._simple_time_of_today_after_regex = RegExpUtility.get_safe_reg_exp(
            SpanishDateTime.SimpleTimeOfTodayAfterRegex)
        self._simple_time_of_today_before_regex = RegExpUtility.get_safe_reg_exp(
            SpanishDateTime.SimpleTimeOfTodayBeforeRegex)
        self._specific_time_of_day_regex = RegExpUtility.get_safe_reg_exp(
            SpanishDateTime.SpecificTimeOfDayRegex)
        self._the_end_of_regex = RegExpUtility.get_safe_reg_exp(
            SpanishDateTime.TheEndOfRegex)
        self._unit_regex = RegExpUtility.get_safe_reg_exp(
            SpanishDateTime.UnitRegex)

        self.next_prefix_regex = RegExpUtility.get_safe_reg_exp(
            SpanishDateTime.NextPrefixRegex)
        self.past_prefix_regex = RegExpUtility.get_safe_reg_exp(
            SpanishDateTime.PastPrefixRegex)

        self._date_extractor = config.date_extractor
        self._time_extractor = config.time_extractor
        self._date_parser = config.date_parser
        self._time_parser = config.time_parser
        self._numbers = config.numbers
        self._cardinal_extractor = config.cardinal_extractor
        self._number_parser = config.number_parser
        self._duration_extractor = config.duration_extractor
        self._duration_parser = config.duration_parser
        self._unit_map = config.unit_map
        self._utility_configuration = config.utility_configuration
Exemple #17
0
 def adjust_by_prefix(self, prefix: str, adjust: AdjustParams):
     delta_min = 0
     prefix = prefix.strip().lower()
     if prefix.startswith('half'):
         delta_min = 30
     elif prefix.startswith('a quarter') or prefix.startswith('quarter'):
         delta_min = 15
     elif prefix.startswith('three quarter'):
         delta_min = 45
     else:
         match = regex.search(self.less_than_one_hour, prefix)
         min_str = RegExpUtility.get_group(match, 'deltamin')
         if min_str:
             delta_min = int(min_str)
         else:
             min_str = RegExpUtility.get_group(match, 'deltaminnum').lower()
             delta_min = self.numbers[min_str]
     if prefix.endswith('to'):
         delta_min = delta_min * -1
     adjust.minute += delta_min
     if adjust.minute < 0:
         adjust.minute += 60
         adjust.hour -= 1
     adjust.has_minute = True
    def adjust_by_prefix(self, prefix: str, adjust: AdjustParams):
        delta_min = 0
        trimmed_prefix = prefix.strip().lower()

        # @todo Move hardcoded strings to resource YAML file.
        if regex.search(self._half_token_regex, prefix):
            delta_min = -30
        elif regex.search(self._quarter_to_token_regex, prefix):
            delta_min = -15
        elif regex.search(self._quarter_past_token_regex, prefix):
            delta_min = 15
        elif regex.search(self._three_quarter_to_token_regex, prefix):
            delta_min = -45
        elif regex.search(self._three_quarter_past_token_regex, prefix):
            delta_min = 45
        else:
            match = regex.search(self.less_than_one_hour, trimmed_prefix)
            if match:
                min_str = RegExpUtility.get_group(match, 'deltamin')
                if min_str:
                    delta_min = int(min_str)
                else:
                    min_str = RegExpUtility.get_group(match,
                                                      'deltaminnum').lower()
                    delta_min = self.numbers.get(min_str)

        if trimmed_prefix.startswith('zum'):
            delta_min = delta_min * -1

        adjust.minute += delta_min

        if adjust.minute < 0:
            adjust.minute += 60
            adjust.hour -= 1

        adjust.has_minute = True
Exemple #19
0
    def adjust_by_suffix(self, suffix: str, adjust: AdjustParams):
        suffix = suffix.strip().lower()

        delta_hour = 0
        match = regex.match(self.time_suffix, suffix)

        if match and match.group() == suffix:
            oclock_str = RegExpUtility.get_group(match, 'heures')
            if not oclock_str:
                am_str = RegExpUtility.get_group(match, 'am')
                if am_str:
                    if adjust.hour >= 12:
                        delta_hour -= 12

                    adjust.has_am = True

                pm_str = RegExpUtility.get_group(match, 'pm')
                if pm_str:
                    if adjust.hour < 12:
                        delta_hour = 12

                    adjust.has_pm = True

        adjust.hour = (adjust.hour + delta_hour) % 24
Exemple #20
0
    def _parse_week_of_month(self, source: str, reference: datetime) -> DateTimeResolutionResult:
        result = DateTimeResolutionResult()
        match = self.config.week_of_month_regex.search(source)

        if not (match and len(match.group()) == len(source)):
            return result

        cardinal_str = RegExpUtility.get_group(match, 'cardinal')
        month_str = RegExpUtility.get_group(match, 'month')
        month = reference.month
        year = reference.year
        no_year = False
        cardinal = 5 if self.config.is_last_cardinal(cardinal_str) else self.config.cardinal_map.get(cardinal_str)

        if not month_str:
            swift = self.config.get_swift_day_or_month(source)
            temp_data = reference + datedelta(months=swift)
            month = temp_data.month
            year = temp_data.year
        else:
            month = self.config.month_of_year.get(month_str)
            no_year = True

        return self._get_week_of_month(cardinal, month, year, reference, no_year)
 def __init__(self, placeholder: str = FrenchNumeric.PlaceHolderDefault):
     self.__regexes = [
         ReVal(
             re=RegExpUtility.get_safe_reg_exp(
                 FrenchNumeric.NumbersWithPlaceHolder(placeholder), regex.I),
             val='IntegerNum'),
         ReVal(
             re=RegExpUtility.get_safe_reg_exp(
                 FrenchNumeric.NumbersWithSuffix, regex.S),
             val='IntegerNum'),
         ReVal(
             re=RegExpUtility.get_safe_reg_exp(self._generate_format_regex(
                 LongFormatMode.INTEGER_DOT, placeholder), regex.V1),
             val='IntegerNum'),
         ReVal(
             re=RegExpUtility.get_safe_reg_exp(self._generate_format_regex(
                 LongFormatMode.INTEGER_BLANK, placeholder)),
             val='IntegerNum'),
         ReVal(
             re=RegExpUtility.get_safe_reg_exp(self._generate_format_regex(
                 LongFormatMode.INTEGER_NO_BREAK_SPACE, placeholder)),
             val='IntegerNum'),
         ReVal(
             re=RegExpUtility.get_safe_reg_exp(
                 FrenchNumeric.RoundNumberIntegerRegexWithLocks),
             val='IntegerNum'),
         ReVal(
             re=RegExpUtility.get_safe_reg_exp(
                 FrenchNumeric.NumbersWithDozenSuffix),
             val='IntegerNum'),
         ReVal(
             re=RegExpUtility.get_safe_reg_exp(
                 FrenchNumeric.AllIntRegexWithLocks),
             val=f'Integer{FrenchNumeric.LangMarker}'),
         ReVal(
             re=RegExpUtility.get_safe_reg_exp(
                 FrenchNumeric.AllIntRegexWithDozenSuffixLocks),
             val=f'Integer{FrenchNumeric.LangMarker}')
     ]
 def __init__(self):
     self._integer_extractor = EnglishIntegerExtractor()
     self._date_extractor = BaseDateExtractor(
         EnglishDateExtractorConfiguration())
     self._time_extractor = BaseTimeExtractor(
         EnglishTimeExtractorConfiguration())
     self._duration_extractor = BaseDurationExtractor(
         EnglishDurationExtractorConfiguration())
     self._date_period_extractor = BaseDatePeriodExtractor(
         EnglishDatePeriodExtractorConfiguration())
     self._time_period_extractor = BaseTimePeriodExtractor(
         EnglishTimePeriodExtractorConfiguration())
     self._date_time_extractor = BaseDateTimeExtractor(
         EnglishDateTimeExtractorConfiguration())
     self._date_time_period_extractor = BaseDateTimePeriodExtractor(
         EnglishDateTimePeriodExtractorConfiguration())
     self._set_extractor = BaseSetExtractor(
         EnglishSetExtractorConfiguration())
     self._holiday_extractor = BaseHolidayExtractor(
         EnglishHolidayExtractorConfiguration())
     self._after_regex = RegExpUtility.get_safe_reg_exp(
         EnglishDateTime.AfterRegex)
     self._before_regex = RegExpUtility.get_safe_reg_exp(
         EnglishDateTime.BeforeRegex)
     self._since_regex = RegExpUtility.get_safe_reg_exp(
         EnglishDateTime.SinceRegex)
     self._from_to_regex = RegExpUtility.get_safe_reg_exp(
         EnglishDateTime.FromToRegex)
     self._single_ambiguous_month_regex = RegExpUtility.get_safe_reg_exp(
         EnglishDateTime.SingleAmbiguousMonthRegex)
     self._preposition_suffix_regex = RegExpUtility.get_safe_reg_exp(
         EnglishDateTime.PrepositionSuffixRegex)
     self._ambiguous_range_modifier_prefix = RegExpUtility.get_safe_reg_exp(
         EnglishDateTime.AmbiguousRangeModifierPrefix)
     self._number_ending_pattern = RegExpUtility.get_safe_reg_exp(
         EnglishDateTime.NumberEndingPattern)
     self._filter_word_regex_list = [
         RegExpUtility.get_safe_reg_exp(EnglishDateTime.OneOnOneRegex)
     ]
Exemple #23
0
 def __init__(self, placeholder):
     self.__regexes = [
         ReVal(
             re=RegExpUtility.get_safe_reg_exp(
                 FrenchNumeric.DoubleDecimalPointRegex(placeholder)),
             val='DoubleNum'),
         ReVal(
             re=RegExpUtility.get_safe_reg_exp(
                 FrenchNumeric.DoubleWithoutIntegralRegex(placeholder)),
             val='DoubleNum'),
         ReVal(
             re=RegExpUtility.get_safe_reg_exp(self._generate_format_regex(
                 LongFormatMode.DOUBLE_DOT_COMMA, placeholder)),
             val='DoubleNum'),
         ReVal(
             re=RegExpUtility.get_safe_reg_exp(self._generate_format_regex(
                 LongFormatMode.DOUBLE_NO_BREAK_SPACE_COMMA, placeholder)),
             val='DoubleNum'),
         ReVal(
             re=RegExpUtility.get_safe_reg_exp(
                 FrenchNumeric.DoubleWithMultiplierRegex),
             val='DoubleNum'),
         ReVal(
             re=RegExpUtility.get_safe_reg_exp(
                 FrenchNumeric.DoubleWithRoundNumber),
             val='DoubleNum'),
         ReVal(
             re=RegExpUtility.get_safe_reg_exp(
                 FrenchNumeric.DoubleAllFloatRegex),
             val='DoubleFr'),
         ReVal(
             re=RegExpUtility.get_safe_reg_exp(
                 FrenchNumeric.DoubleExponentialNotationRegex),
             val='DoublePow'),
         ReVal(
             re=RegExpUtility.get_safe_reg_exp(
                 FrenchNumeric.DoubleCaretExponentialNotationRegex),
             val='DoublePow')
     ]
Exemple #24
0
    def _merge_two_times_points(self, source: str, reference: datetime) -> DateTimeResolutionResult:
        trimmed_source = source.strip()
        result = DateTimeResolutionResult()
        ers = self.config.date_extractor.extract(trimmed_source, reference)

        if not ers or len(ers) < 2:
            ers = self.config.date_extractor.extract(self.config.token_before_date + trimmed_source, reference)

            for er in ers:
                er.start -= len(self.config.token_before_date)

            if not ers or len(ers) < 2:
                return result

        match = self.config.week_with_week_day_range_regex.search(source)
        if match:
            week_prefix = RegExpUtility.get_group(match, 'week')

            if week_prefix:
                ers[0].text = f'{week_prefix} {ers[0].text}'
                ers[1].text = f'{week_prefix} {ers[1].text}'

        prs = []

        for er in ers:
            pr = self.config.date_parser.parse(er, reference)
            if pr:
                prs.append(pr)

        if len(prs) < 2:
            return result

        pr_begin = prs[0]
        pr_end = prs[1]
        future_begin = pr_begin.value.future_value
        future_end = pr_end.value.future_value
        past_begin = pr_begin.value.past_value
        past_end = pr_end.value.past_value

        result.sub_date_time_entities = prs
        result.timex = f'({pr_begin.timex_str},{pr_end.timex_str},P{(future_end - future_begin).days}D)'
        result.future_value = [future_begin, future_end]
        result.past_value = [past_begin, past_end]
        result.success = True

        return result
    def extend_with_week_day_and_year(self, start_index: int, end_index: int, month: int,
                                      day: int, text: str, reference: datetime):
        from .utilities import DateUtils
        import calendar

        year = reference.year

        # Check whether there's a year
        suffix = text[end_index:]
        prefix = text[0: start_index]
        year_index, success = self.get_year_index(suffix, year, False)
        end_index += year_index

        # Check also in prefix
        if not success and self.config.check_both_before_after:
            year_index, success = self.get_year_index(suffix, year, False)
            start_index -= year_index

        # Check also in prefix
        date = DateUtils.safe_create_from_value(DateUtils.min_value, year, month, day)
        is_match_in_suffix = False
        match_week_day = self.config.week_day_end.match(prefix)

        if not match_week_day:
            match_week_day = self.config.week_day_start.match(suffix)
            is_match_in_suffix = True if match_week_day else False

        if match_week_day:
            # Get weekday from context directly, compare it with the weekday extraction above
            # to see whether they reference the same weekday
            extracted_week_day_str = RegExpUtility.get_group(
                match_week_day, Constants.WEEKDAY_GROUP_NAME)
            num_week_day_str = calendar.day_name[date.weekday()].lower()
            week_day_1 = self.config.day_of_week.get(num_week_day_str)
            week_day_2 = self.config.day_of_week.get(extracted_week_day_str)

            if self.config.day_of_week.get(num_week_day_str, week_day_1) and \
                    self.config.day_of_week.get(extracted_week_day_str, week_day_2):

                if not date == DateUtils.min_value and week_day_1 == week_day_2:
                    if not is_match_in_suffix:
                        start_index = match_week_day.start()
                    else:
                        end_index += match_week_day.end()

        return start_index, end_index
 def __init__(self):
     self._later_regex = RegExpUtility.get_safe_reg_exp(
         EnglishDateTime.LaterRegex)
     self._ago_regex = RegExpUtility.get_safe_reg_exp(
         EnglishDateTime.AgoRegex)
     self._in_connector_regex = RegExpUtility.get_safe_reg_exp(
         EnglishDateTime.InConnectorRegex)
     self._range_unit_regex = RegExpUtility.get_safe_reg_exp(
         EnglishDateTime.RangeUnitRegex)
     self._am_desc_regex = RegExpUtility.get_safe_reg_exp(
         EnglishDateTime.AmDescRegex)
     self._pm_desc__regex = RegExpUtility.get_safe_reg_exp(
         EnglishDateTime.PmDescRegex)
     self._am_pm_desc_regex = RegExpUtility.get_safe_reg_exp(
         EnglishDateTime.AmPmDescRegex)
 def __init__(self, mode: ChineseNumberExtractorMode = ChineseNumberExtractorMode.DEFAULT):
     self.__regexes = [
         ReVal(
             re=RegExpUtility.get_safe_reg_exp(
                 ChineseNumeric.NumbersSpecialsChars),
             val='IntegerNum'),
         ReVal(
             re=RegExpUtility.get_safe_reg_exp(
                 ChineseNumeric.NumbersSpecialsCharsWithSuffix),
             val='IntegerNum'),
         ReVal(
             re=RegExpUtility.get_safe_reg_exp(
                 ChineseNumeric.DottedNumbersSpecialsChar),
             val='IntegerNum'),
         ReVal(
             re=RegExpUtility.get_safe_reg_exp(
                 ChineseNumeric.NumbersWithHalfDozen),
             val=f'Integer{ChineseNumeric.LangMarker}'),
         ReVal(
             re=RegExpUtility.get_safe_reg_exp(
                 ChineseNumeric.NumbersWithDozen),
             val=f'Integer{ChineseNumeric.LangMarker}'),
         ReVal(
             re=RegExpUtility.get_safe_reg_exp(
                 ChineseNumeric.HalfUnitRegex),
             val=f'Integer{ChineseNumeric.LangMarker}')
     ]
     if mode == ChineseNumberExtractorMode.DEFAULT:
         self.__regexes.append(
             ReVal(
                 re=RegExpUtility.get_safe_reg_exp(
                     ChineseNumeric.NumbersWithAllowListRegex),
                 val=f'Integer{ChineseNumeric.LangMarker}'
             )
         )
     elif mode == ChineseNumberExtractorMode.EXTRACT_ALL:
         self.__regexes.append(
             ReVal(
                 re=RegExpUtility.get_safe_reg_exp(
                     ChineseNumeric.NumbersAggressiveRegex),
                 val=f'Integer{ChineseNumeric.LangMarker}'
             )
         )
    def extend_with_week_day_and_year(self, start_index: int, end_index: int,
                                      month: int, day: int, text: str,
                                      reference: datetime):
        from .abstract_year_extractor import AbstractYearExtractor
        from .utilities import DateUtils
        import calendar
        year = reference.year

        # Check whether there's a year
        suffix = text[end_index:]
        match_year = self.config.year_suffix.match(suffix)

        if match_year and match_year.start() == 0:

            year = AbstractYearExtractor.get_year_from_text(self, match_year)

            if Constants.MIN_YEAR_NUM <= year <= Constants.MAX_YEAR_NUM:
                end_index += len(match_year.group())

        date = DateUtils.safe_create_from_value(DateUtils.min_value, year,
                                                month, day)

        # Check whether there's a weekday
        prefix = text[:start_index]
        match_week_day = self.config.week_day_end.match(prefix)

        if match_week_day:
            # Get weekday from context directly, compare it with the weekday extraction above
            # to see whether they reference the same weekday
            extracted_week_day_str = RegExpUtility.get_group(
                match_week_day, Constants.WEEKDAY_GROUP_NAME)
            num_week_day_str = calendar.day_name[date.weekday()].lower()

            if self.config.day_of_week.get(num_week_day_str) and \
                    self.config.day_of_week.get(extracted_week_day_str):

                week_day_1 = self.config.day_of_week.get(num_week_day_str)
                week_day_2 = self.config.day_of_week.get(
                    extracted_week_day_str)

                if not date == DateUtils.min_value and week_day_1 == week_day_2:
                    start_index = match_week_day.end()

        return start_index, end_index
Exemple #29
0
    def __init__(self, config):
        super().__init__()
        self._holiday_regexes = [
            RegExpUtility.get_safe_reg_exp(SpanishDateTime.HolidayRegex1),
            RegExpUtility.get_safe_reg_exp(SpanishDateTime.HolidayRegex2),
            RegExpUtility.get_safe_reg_exp(SpanishDateTime.HolidayRegex3)
        ]
        self._holiday_names = SpanishDateTime.HolidayNames
        self._variable_holidays_timex_dictionary = SpanishDateTime.VariableHolidaysTimexDictionary

        self.next_prefix_regex = RegExpUtility.get_safe_reg_exp(SpanishDateTime.NextPrefixRegex)
        self.past_prefix_regex = RegExpUtility.get_safe_reg_exp(SpanishDateTime.PastPrefixRegex)
        self.this_prefix_regex = RegExpUtility.get_safe_reg_exp(SpanishDateTime.ThisPrefixRegex)
Exemple #30
0
    def parse_each_unit(self, source: str) -> DateTimeResolutionResult:
        result = DateTimeResolutionResult()

        match = self.config.each_unit_regex.search(source)
        if not (match and (match.end() - match.start()) == len(source)):
            return result

        source_unit = RegExpUtility.get_group(match, 'unit')
        if not (source_unit and source_unit in self.config.unit_map):
            return result

        get_matched_unit_timex = self.config.get_matched_unit_timex(source_unit)
        if not get_matched_unit_timex.matched:
            return result

        result.timex = get_matched_unit_timex.timex
        result.future_value = result.past_value = 'Set: ' + result.timex
        result.success = True
        return result