Beispiel #1
0
 def __init__(self):
     self.__regexes = [
         ReVal(
             re=RegExpUtility.get_safe_reg_exp(
                 ChineseNumeric.DoubleSpecialsChars),
             val='DoubleNum'),
         ReVal(
             re=RegExpUtility.get_safe_reg_exp(
                 ChineseNumeric.DoubleSpecialsCharsWithNegatives),
             val='DoubleNum'),
         ReVal(
             re=RegExpUtility.get_safe_reg_exp(
                 ChineseNumeric.SimpleDoubleSpecialsChars),
             val='DoubleNum'),
         ReVal(
             re=RegExpUtility.get_safe_reg_exp(
                 ChineseNumeric.DoubleWithMultiplierRegex),
             val='DoubleNum'),
         ReVal(
             re=RegExpUtility.get_safe_reg_exp(
                 ChineseNumeric.DoubleWithThousandsRegex),
             val=f'Double{ChineseNumeric.LangMarker}'),
         ReVal(
             re=RegExpUtility.get_safe_reg_exp(
                 ChineseNumeric.DoubleAllFloatRegex),
             val=f'Double{ChineseNumeric.LangMarker}'),
         ReVal(
             re=RegExpUtility.get_safe_reg_exp(
                 ChineseNumeric.DoubleExponentialNotationRegex),
             val='DoublePow'),
         ReVal(
             re=RegExpUtility.get_safe_reg_exp(
                 ChineseNumeric.DoubleScientificNotationRegex),
             val='DoublePow')
     ]
 def __init__(self, config: BaseDateParserConfiguration):
     self._time_extractor = config.time_extractor
     self._time_parser = config.time_parser
     self._integer_extractor = config.integer_extractor
     self._numbers = config.numbers
     self._utility_configuration = config.utility_configuration
     self._pure_number_from_to_regex = RegExpUtility.get_safe_reg_exp(
         FrenchDateTime.PureNumFromTo)
     self._pure_number_between_and_regex = RegExpUtility.get_safe_reg_exp(
         FrenchDateTime.PureNumBetweenAnd)
     self._time_of_day_regex = RegExpUtility.get_safe_reg_exp(
         FrenchDateTime.TimeOfDayRegex)
     self._till_regex = RegExpUtility.get_safe_reg_exp(
         FrenchDateTime.TillRegex)
Beispiel #3
0
    def __init__(self):
        self._last_regex = RegExpUtility.get_safe_reg_exp(
            SpanishDateTime.LastDateRegex)
        self._periodic_regex = RegExpUtility.get_safe_reg_exp(
            SpanishDateTime.PeriodicRegex)
        self._each_unit_regex = RegExpUtility.get_safe_reg_exp(
            SpanishDateTime.EachUnitRegex)
        self._each_prefix_regex = RegExpUtility.get_safe_reg_exp(
            SpanishDateTime.EachPrefixRegex)
        self._each_day_regex = RegExpUtility.get_safe_reg_exp(
            SpanishDateTime.EachDayRegex)
        self._before_each_day_regex = RegExpUtility.get_safe_reg_exp(
            SpanishDateTime.BeforeEachDayRegex)
        self._set_each_regex = RegExpUtility.get_safe_reg_exp(
            SpanishDateTime.SetEachRegex)
        self._set_week_day_regex = RegExpUtility.get_safe_reg_exp(
            SpanishDateTime.SetWeekDayRegex)

        self._duration_extractor = BaseDurationExtractor(
            SpanishDurationExtractorConfiguration())
        self._time_extractor = BaseTimeExtractor(
            SpanishTimeExtractorConfiguration())
        self._date_extractor = BaseDateExtractor(
            SpanishDateExtractorConfiguration())
        self._date_time_extractor = BaseDateTimeExtractor(
            SpanishDateTimeExtractorConfiguration())
        self._date_period_extractor = BaseDatePeriodExtractor(
            SpanishDatePeriodExtractorConfiguration())
        self._time_period_extractor = BaseTimePeriodExtractor(
            SpanishTimePeriodExtractorConfiguration())
        self._date_time_period_extractor = BaseDateTimePeriodExtractor(
            SpanishDateTimePeriodExtractorConfiguration())
        self._cardinal_extractor = SpanishCardinalExtractor()
Beispiel #4
0
    def __init__(self, config: BaseDateParserConfiguration):
        self._time_token_prefix: str = SpanishDateTime.TimeTokenPrefix
        self._at_regex: Pattern = RegExpUtility.get_safe_reg_exp(
            SpanishDateTime.AtRegex)
        self._time_regexes: List[Pattern] = SpanishTimeExtractorConfiguration.get_time_regex_list(
        )
        self.less_than_one_hour = RegExpUtility.get_safe_reg_exp(
            SpanishDateTime.LessThanOneHour)
        self.time_suffix = RegExpUtility.get_safe_reg_exp(
            SpanishDateTime.TimeSuffix)

        self._utility_configuration = config.utility_configuration
        self._numbers: Dict[str, int] = config.numbers
        self._time_zone_parser = config.time_zone_parser
Beispiel #5
0
 def __init__(self):
     self._simple_cases_regex: List[Pattern] = [
         RegExpUtility.get_safe_reg_exp(EnglishDateTime.PureNumFromTo),
         RegExpUtility.get_safe_reg_exp(EnglishDateTime.PureNumBetweenAnd)
     ]
     self._till_regex: Pattern = RegExpUtility.get_safe_reg_exp(
         EnglishDateTime.TillRegex)
     self._time_of_day_regex: Pattern = RegExpUtility.get_safe_reg_exp(
         EnglishDateTime.TimeOfDayRegex)
     self._general_ending_regex: Pattern = RegExpUtility.get_safe_reg_exp(
         EnglishDateTime.GeneralEndingRegex)
     self._single_time_extractor = BaseTimeExtractor(
         EnglishTimeExtractorConfiguration())
     self._integer_extractor = EnglishIntegerExtractor()
Beispiel #6
0
 def __init__(self, placeholder):
     self.__regexes = [
         ReVal(re=RegExpUtility.get_safe_reg_exp(
             PortugueseNumeric.DoubleDecimalPointRegex(placeholder)),
               val='DoubleNum'),
         ReVal(re=RegExpUtility.get_safe_reg_exp(
             PortugueseNumeric.DoubleWithoutIntegralRegex(placeholder)),
               val='DoubleNum'),
         ReVal(re=PortugueseNumeric.DoubleWithMultiplierRegex,
               val='DoubleNum'),
         ReVal(re=RegExpUtility.get_safe_reg_exp(
             PortugueseNumeric.DoubleWithRoundNumber),
               val='DoubleNum'),
         ReVal(re=RegExpUtility.get_safe_reg_exp(
             PortugueseNumeric.DoubleAllFloatRegex),
               val='DoublePor'),
         ReVal(re=RegExpUtility.get_safe_reg_exp(
             PortugueseNumeric.DoubleExponentialNotationRegex),
               val='DoublePow'),
         ReVal(re=RegExpUtility.get_safe_reg_exp(
             PortugueseNumeric.DoubleCaretExponentialNotationRegex),
               val='DoublePow'),
         ReVal(re=RegExpUtility.get_safe_reg_exp(
             self._generate_format_regex(LongFormatMode.DOUBLE_DOT_COMMA,
                                         placeholder)),
               val='DoubleNum'),
         ReVal(re=RegExpUtility.get_safe_reg_exp(
             self._generate_format_regex(
                 LongFormatMode.DOUBLE_NO_BREAK_SPACE_COMMA, placeholder)),
               val='DoubleNum')
     ]
Beispiel #7
0
 def __init__(self, culture_info: CultureInfo):
     if culture_info is None:
         culture_info = CultureInfo(Culture.Chinese)
     super().__init__(culture_info)
     self._unit_num_extractor = ChineseNumberExtractor(
         ChineseNumberExtractorMode.EXTRACT_ALL)
     self._build_prefix = ChineseNumericWithUnit.BuildPrefix
     self._build_suffix = ChineseNumericWithUnit.BuildSuffix
     self._compound_unit_connector_regex = RegExpUtility.get_safe_reg_exp(
         ChineseNumericWithUnit.CompoundUnitConnectorRegex)
     self._pm_non_unit_regex = RegExpUtility.get_safe_reg_exp(
         BaseUnits.PmNonUnitRegex)
     self._half_unit_regex = RegExpUtility.get_safe_reg_exp(
         ChineseNumericWithUnit.HalfUnitRegex)
Beispiel #8
0
 def __init__(self):
     self.__regexes = [
         ReVal(
             re=RegExpUtility.get_safe_reg_exp(
                 ChineseNumeric.FractionNotationSpecialsCharsRegex),
             val='FracNum'),
         ReVal(
             re=RegExpUtility.get_safe_reg_exp(
                 ChineseNumeric.FractionNotationRegex),
             val='FracNum'),
         ReVal(
             re=RegExpUtility.get_safe_reg_exp(
                 ChineseNumeric.AllFractionNumber),
             val=f'Frac{ChineseNumeric.LangMarker}')
     ]
Beispiel #9
0
 def __init__(self):
     self.__regexes = [
         # -4 5/2,  4 6/3
         ReVal(re=RegExpUtility.get_safe_reg_exp(
             JapaneseNumeric.FractionNotationSpecialsCharsRegex),
               val='FracNum'),
         # 8/3
         ReVal(re=RegExpUtility.get_safe_reg_exp(
             JapaneseNumeric.FractionNotationRegex),
               val='FracNum'),
         # 五分の二 七分の三
         ReVal(re=RegExpUtility.get_safe_reg_exp(
             JapaneseNumeric.AllFractionNumber),
               val='FracJpn')
     ]
Beispiel #10
0
 def __init__(self):
     self.__regexes = [
         # だい一百五十四
         ReVal(re=RegExpUtility.get_safe_reg_exp(
             JapaneseNumeric.OrdinalRegex),
               val='OrdinalJpn'),
         # だい2565
         ReVal(re=RegExpUtility.get_safe_reg_exp(
             JapaneseNumeric.OrdinalNumbersRegex),
               val='OrdinalJpn'),
         # 2折 2.5折
         ReVal(re=RegExpUtility.get_safe_reg_exp(
             JapaneseNumeric.NumbersFoldsPercentageRegex),
               val='OrdinalJpn')
     ]
 def __init__(self):
     self._regexes = [
         ReVal(RegExpUtility.get_safe_reg_exp(BasePhoneNumbers.BRPhoneNumberRegex), Constants.PHONE_NUMBER_REGEX_BR),
         ReVal(RegExpUtility.get_safe_reg_exp(BasePhoneNumbers.GeneralPhoneNumberRegex),
               Constants.PHONE_NUMBER_REGEX_GENERAL),
         ReVal(RegExpUtility.get_safe_reg_exp(BasePhoneNumbers.UKPhoneNumberRegex), Constants.PHONE_NUMBER_REGEX_UK),
         ReVal(RegExpUtility.get_safe_reg_exp(BasePhoneNumbers.DEPhoneNumberRegex), Constants.PHONE_NUMBER_REGEX_DE),
         ReVal(RegExpUtility.get_safe_reg_exp(BasePhoneNumbers.USPhoneNumberRegex), Constants.PHONE_NUMBER_REGEX_US),
         ReVal(RegExpUtility.get_safe_reg_exp(BasePhoneNumbers.CNPhoneNumberRegex), Constants.PHONE_NUMBER_REGEX_CN),
         ReVal(RegExpUtility.get_safe_reg_exp(BasePhoneNumbers.DKPhoneNumberRegex), Constants.PHONE_NUMBER_REGEX_DK),
         ReVal(RegExpUtility.get_safe_reg_exp(BasePhoneNumbers.ITPhoneNumberRegex), Constants.PHONE_NUMBER_REGEX_IT),
         ReVal(RegExpUtility.get_safe_reg_exp(BasePhoneNumbers.NLPhoneNumberRegex), Constants.PHONE_NUMBER_REGEX_NL),
         ReVal(RegExpUtility.get_safe_reg_exp(BasePhoneNumbers.SpecialPhoneNumberRegex),
               Constants.PHONE_NUMBER_REGEX_SPECIAL),
     ]
Beispiel #12
0
    def __init__(self, mode: ChineseNumberExtractorMode = ChineseNumberExtractorMode.DEFAULT):
        self.__regexes: List[ReVal] = list()

        cardinal_ex = ChineseCardinalExtractor(mode)
        self.__regexes.extend(cardinal_ex.regexes)

        fraction_ex = ChineseFractionExtractor()
        self.__regexes.extend(fraction_ex.regexes)

        ambiguity_filters_dict: List[ReRe] = list()

        if mode != NumberMode.Unit:
            for key, value in ChineseNumeric.AmbiguityFiltersDict.items():
                ambiguity_filters_dict.append(ReRe(reKey=RegExpUtility.get_safe_reg_exp(key),
                                                   reVal=RegExpUtility.get_safe_reg_exp(value)))
        self.__ambiguity_filters_dict = ambiguity_filters_dict
Beispiel #13
0
 def __init__(self):
     self._regexes = [
         ReVal(RegExpUtility.get_safe_reg_exp(BaseEmail.EmailRegex),
               Constants.EMAIL_REGEX),
         #EmailRegex2 will break the code as it's not supported in Python, comment out for now
         #ReVal(RegExpUtility.get_safe_reg_exp(BaseEmail.EmailRegex2), Constants.EMAIL_REGEX),
     ]
    def __init__(self, config):
        self.config = config

        self._tld_matcher = StringMatcher()
        self.tld_matcher().init(BaseURL.TldList)

        self._regexes = [
            ReVal(config.ip_url_regex, Constants.URL_REGEX),
            ReVal(config.url_regex, Constants.URL_REGEX),
            ReVal(RegExpUtility.get_safe_reg_exp(BaseURL.UrlRegex2),
                  Constants.URL_REGEX)
        ]

        self._ambiguous_time_term = ReVal(
            RegExpUtility.get_safe_reg_exp(BaseURL.AmbiguousTimeTerm),
            Constants.URL_REGEX)
Beispiel #15
0
 def __build_regex_from_str(self, source: str,
                            ignore_case: bool) -> Pattern:
     tokens = map(regex.escape, source.split('|'))
     definition = '|'.join(tokens)
     definition = f'{self.config.build_prefix}({definition}){self.config.build_suffix}'
     flags = regex.S + regex.I if ignore_case else regex.S
     return RegExpUtility.get_safe_reg_exp(definition, flags)
 def __init__(self, culture_info: CultureInfo = None):
     super().__init__(culture_info)
     self._suffix_list = ChineseNumericWithUnit.TemperatureSuffixList
     self._prefix_list = ChineseNumericWithUnit.TemperaturePrefixList
     self._ambiguous_unit_list = ChineseNumericWithUnit.TemperatureAmbiguousValues
     self._ambiguous_unit_number_multiplier_regex = RegExpUtility.get_safe_reg_exp(
         BaseUnits.AmbiguousUnitNumberMultiplierRegex)
Beispiel #17
0
 def __init__(self):
     self._date_extractor = ChineseDateExtractor()
     self._time_extractor = ChineseTimeExtractor()
     self._duration_extractor = ChineseDurationExtractor()
     self._date_time_extractor = ChineseDateTimeExtractor()
     self._date_parser = ChineseDateParser()
     self._time_parser = ChineseTimeParser()
     self._duration_parser = ChineseDurationParser()
     self._date_time_parser = ChineseDateTimeParser()
     self._unit_map = ChineseDateTime.ParserConfigurationUnitMap
     self._each_unit_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.SetEachUnitRegex)
     self._each_day_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.SetEachDayRegex)
     self._each_prefix_regex = RegExpUtility.get_safe_reg_exp(
         ChineseDateTime.SetEachPrefixRegex)
 def __init__(self, culture_info: CultureInfo):
     if culture_info is None:
         culture_info = CultureInfo(Culture.English)
     super().__init__(culture_info)
     self._unit_num_extractor = EnglishNumberExtractor()
     self._build_prefix = EnglishNumericWithUnit.BuildPrefix
     self._build_suffix = EnglishNumericWithUnit.BuildSuffix
     self._compound_unit_connector_regex = RegExpUtility.get_safe_reg_exp(EnglishNumericWithUnit.CompoundUnitConnectorRegex)
    def __init__(self, mode: NumberMode = NumberMode.DEFAULT):
        self.__negative_number_terms = RegExpUtility.get_safe_reg_exp(FrenchNumeric.NegativeNumberTermsRegex)
        self.__regexes: List[ReVal] = list()
        cardinal_ex: FrenchCardinalExtractor = None

        if mode is NumberMode.PURE_NUMBER:
            cardinal_ex = FrenchCardinalExtractor(FrenchNumeric.PlaceHolderPureNumber)
        elif mode is NumberMode.CURRENCY:
            self.__regexes.append(ReVal(re=RegExpUtility.get_safe_reg_exp(FrenchNumeric.CurrencyRegex), val='IntegerNum'))

        if cardinal_ex is None:
            cardinal_ex = FrenchCardinalExtractor()

        self.__regexes.extend(cardinal_ex.regexes)

        fraction_ex = FrenchFractionExtractor()
        self.__regexes.extend(fraction_ex.regexes)
    def __init__(self, config):
        FrenchCommonDateTimeParserConfiguration.__init__(self)

        self._before_regex = RegExpUtility.get_safe_reg_exp(
            FrenchDateTime.BeforeRegex)
        self._after_regex = RegExpUtility.get_safe_reg_exp(
            FrenchDateTime.AfterRegex)
        self._since_regex = RegExpUtility.get_safe_reg_exp(
            FrenchDateTime.SinceRegex)

        self._date_period_parser = BaseDatePeriodParser(
            FrenchDatePeriodParserConfiguration(self))
        self._time_period_parser = BaseTimePeriodParser(
            FrenchTimePeriodParserConfiguration(self))
        self._set_parser = BaseSetParser(FrenchSetParserConfiguration(config))
        self._holiday_parser = BaseHolidayParser(
            FrenchHolidayParserConfiguration(config))
 def __init__(self):
     self._all_regex: Pattern = RegExpUtility.get_safe_reg_exp(EnglishDateTime.AllRegex)
     self._half_regex: Pattern = RegExpUtility.get_safe_reg_exp(EnglishDateTime.HalfRegex)
     self._followed_unit: Pattern = RegExpUtility.get_safe_reg_exp(EnglishDateTime.DurationFollowedUnit)
     self._number_combined_with_unit: Pattern = RegExpUtility.get_safe_reg_exp(EnglishDateTime.NumberCombinedWithDurationUnit)
     self._an_unit_regex: Pattern = RegExpUtility.get_safe_reg_exp(EnglishDateTime.AnUnitRegex)
     self._inexact_number_unit_regex: Pattern = RegExpUtility.get_safe_reg_exp(EnglishDateTime.InexactNumberUnitRegex)
     self._suffix_and_regex: Pattern = RegExpUtility.get_safe_reg_exp(EnglishDateTime.SuffixAndRegex)
     self._relative_duration_unit_regex: Pattern = RegExpUtility.get_safe_reg_exp(EnglishDateTime.RelativeDurationUnitRegex)
     self._cardinal_extractor: BaseNumberExtractor = EnglishCardinalExtractor()
 def __init__(self, config):
     self._equal_regex = RegExpUtility.get_safe_reg_exp(
         BaseDateTime.EqualRegex)
     self._suffix_after = RegExpUtility.get_safe_reg_exp(
         EnglishDateTime.SuffixAfterRegex)
     self._year_regex = RegExpUtility.get_safe_reg_exp(
         EnglishDateTime.YearRegex)
     self._around_regex = RegExpUtility.get_safe_reg_exp(
         EnglishDateTime.AroundRegex)
     self.__before_regex = RegExpUtility.get_safe_reg_exp(
         EnglishDateTime.BeforeRegex)
     self.__after_regex = RegExpUtility.get_safe_reg_exp(
         EnglishDateTime.AfterRegex)
     self.__since_regex = RegExpUtility.get_safe_reg_exp(
         EnglishDateTime.SinceRegex)
     self.__holiday_parser = BaseHolidayParser(
         EnglishHolidayParserConfiguration(config))
     self.__date_parser = config.date_parser
     self.__time_parser = config.time_parser
     self.__date_time_parser = config.date_time_parser
     self.__date_period_parser = config.date_period_parser
     self.__time_period_parser = config.time_period_parser
     self.__date_time_period_parser = config.date_time_period_parser
     self.__duration_parser = config.duration_parser
     self.__set_parser = BaseSetParser(
         EnglishSetParserConfiguration(config))
    def __init__(self, config):
        SpanishCommonDateTimeParserConfiguration.__init__(self)
        self._time_zone_parser = config.time_zone_parser
        self._equal_regex = RegExpUtility.get_safe_reg_exp(BaseDateTime.EqualRegex)
        self._suffix_after = RegExpUtility.get_safe_reg_exp(
            SpanishDateTime.SuffixAfterRegex)
        self._year_regex = RegExpUtility.get_safe_reg_exp(
            SpanishDateTime.YearRegex)
        self._around_regex = RegExpUtility.get_safe_reg_exp(
            SpanishDateTime.AroundRegex)
        self._before_regex = RegExpUtility.get_safe_reg_exp(
            SpanishDateTime.BeforeRegex)
        self._after_regex = RegExpUtility.get_safe_reg_exp(
            SpanishDateTime.AfterRegex)
        self._since_regex = RegExpUtility.get_safe_reg_exp(
            SpanishDateTime.SinceRegex)

        self._date_period_parser = BaseDatePeriodParser(
            SpanishDatePeriodParserConfiguration(self))
        self._time_period_parser = BaseTimePeriodParser(
            SpanishTimePeriodParserConfiguration(self))
        self._date_time_period_parser = SpanishDateTimePeriodParser(
            SpanishDateTimePeriodParserConfiguration(self))
        self._set_parser = BaseSetParser(SpanishSetParserConfiguration(config))
        self._holiday_parser = BaseHolidayParser(
            SpanishHolidayParserConfiguration(config))
Beispiel #24
0
    def __init__(self):
        self._last_regex = RegExpUtility.get_safe_reg_exp(
            GermanDateTime.SetLastRegex)
        self._periodic_regex = RegExpUtility.get_safe_reg_exp(
            GermanDateTime.PeriodicRegex)
        self._each_unit_regex = RegExpUtility.get_safe_reg_exp(
            GermanDateTime.EachUnitRegex)
        self._each_prefix_regex = RegExpUtility.get_safe_reg_exp(
            GermanDateTime.EachPrefixRegex)
        self._each_day_regex = RegExpUtility.get_safe_reg_exp(
            GermanDateTime.EachDayRegex)
        self._before_each_day_regex = None
        self._set_each_regex = RegExpUtility.get_safe_reg_exp(
            GermanDateTime.SetEachRegex)
        self._set_week_day_regex = RegExpUtility.get_safe_reg_exp(
            GermanDateTime.SetWeekDayRegex)

        self._duration_extractor = BaseDurationExtractor(
            GermanDurationExtractorConfiguration())
        self._time_extractor = BaseTimeExtractor(
            GermanTimeExtractorConfiguration())
        self._date_extractor = BaseDateExtractor(
            GermanDateExtractorConfiguration())
        self._date_time_extractor = BaseDateTimeExtractor(
            GermanDateTimeExtractorConfiguration())
        self._date_period_extractor = BaseDatePeriodExtractor(
            GermanDatePeriodExtractorConfiguration())
        self._time_period_extractor = BaseTimePeriodExtractor(
            GermanTimePeriodExtractorConfiguration())
        self._date_time_period_extractor = BaseDateTimePeriodExtractor(
            GermanDateTimePeriodExtractorConfiguration())
Beispiel #25
0
    def __init__(self, culture_info=None):
        if culture_info is None:
            culture_info = CultureInfo(Culture.Japanese)

        self._culture_info = culture_info

        self._lang_marker = JapaneseNumeric.LangMarker
        self._decimal_separator_char = JapaneseNumeric.DecimalSeparatorChar
        self._fraction_marker_token = JapaneseNumeric.FractionMarkerToken
        self._non_decimal_separator_char = JapaneseNumeric.NonDecimalSeparatorChar
        self._half_a_dozen_text = JapaneseNumeric.HalfADozenText
        self._word_separator_token = JapaneseNumeric.WordSeparatorToken

        self._round_number_map = JapaneseNumeric.RoundNumberMap
        self._digital_number_regex = RegExpUtility.get_safe_reg_exp(JapaneseNumeric.DigitalNumberRegex)

        self._zero_to_nine_map = JapaneseNumeric.ZeroToNineMap
        self._round_number_map_char = JapaneseNumeric.RoundNumberMapChar
        self._full_to_half_map = JapaneseNumeric.FullToHalfMap
        self._unit_map = JapaneseNumeric.UnitMap
        self._round_direct_list = JapaneseNumeric.RoundDirectList
        self._digit_num_regex = JapaneseNumeric.DigitNumRegex
        self._dozen_regex = JapaneseNumeric.DozenRegex
        self._percentage_regex = JapaneseNumeric.PercentageRegex
        self._double_and_round_regex = RegExpUtility.get_safe_reg_exp(JapaneseNumeric.DoubleAndRoundRegex)
        self._frac_split_regex = RegExpUtility.get_safe_reg_exp(JapaneseNumeric.FracSplitRegex)
        self._negative_number_sign_regex = RegExpUtility.get_safe_reg_exp(JapaneseNumeric.NegativeNumberSignRegex)
        self._point_regex = JapaneseNumeric.PointRegex
        self._spe_get_number_regex = RegExpUtility.get_safe_reg_exp(JapaneseNumeric.SpeGetNumberRegex)
        self._pair_regex = RegExpUtility.get_safe_reg_exp(JapaneseNumeric.PairRegex)
        self._round_number_integer_regex = RegExpUtility.get_safe_reg_exp(JapaneseNumeric.RoundNumberIntegerRegex)
Beispiel #26
0
 def __init__(self):
     super().__init__()
     self._check_both_before_after = EnglishDateTime.CheckBothBeforeAfter
     self._simple_cases_regex: List[Pattern] = [
         RegExpUtility.get_safe_reg_exp(EnglishDateTime.PureNumFromTo),
         RegExpUtility.get_safe_reg_exp(EnglishDateTime.PureNumBetweenAnd),
         RegExpUtility.get_safe_reg_exp(EnglishDateTime.SpecificTimeFromTo),
         RegExpUtility.get_safe_reg_exp(
             EnglishDateTime.SpecificTimeBetweenAnd)
     ]
     self._till_regex: Pattern = RegExpUtility.get_safe_reg_exp(
         EnglishDateTime.TillRegex)
     self._time_of_day_regex: Pattern = RegExpUtility.get_safe_reg_exp(
         EnglishDateTime.TimeOfDayRegex)
     self._general_ending_regex: Pattern = RegExpUtility.get_safe_reg_exp(
         EnglishDateTime.GeneralEndingRegex)
     self._single_time_extractor = BaseTimeExtractor(
         EnglishTimeExtractorConfiguration())
     self._integer_extractor = EnglishIntegerExtractor()
     self._time_zone_extractor = BaseTimeZoneExtractor(
         EnglishTimeZoneExtractorConfiguration())
     self._token_before_date = EnglishDateTime.TokenBeforeDate
     self._pure_number_regex = [
         EnglishDateTime.PureNumFromTo, EnglishDateTime.PureNumFromTo
     ]
     self._options = DateTimeOptions.NONE
Beispiel #27
0
 def __init__(self):
     self.__regexes = [
         ReVal(re=RegExpUtility.get_safe_reg_exp(
             JapaneseNumeric.DoubleSpecialsChars),
               val='DoubleNum'),
         # (-)2.5, can avoid cases like ip address xx.xx.xx.xx
         ReVal(re=RegExpUtility.get_safe_reg_exp(
             JapaneseNumeric.DoubleSpecialsCharsWithNegatives),
               val='DoubleNum'),
         # (-).2
         ReVal(re=RegExpUtility.get_safe_reg_exp(
             JapaneseNumeric.SimpleDoubleSpecialsChars),
               val='DoubleNum'),
         # 1.0 K
         ReVal(re=RegExpUtility.get_safe_reg_exp(
             JapaneseNumeric.DoubleWithMultiplierRegex),
               val='DoubleNum'),
         # 15.2万
         ReVal(re=RegExpUtility.get_safe_reg_exp(
             JapaneseNumeric.DoubleWithThousandsRegex),
               val='DoubleJpn'),
         # 2e6, 21.2e0
         ReVal(re=RegExpUtility.get_safe_reg_exp(
             JapaneseNumeric.DoubleExponentialNotationRegex),
               val='DoublePow'),
         # 2^5
         ReVal(re=RegExpUtility.get_safe_reg_exp(
             JapaneseNumeric.DoubleScientificNotationRegex),
               val='DoublePow')
     ]
Beispiel #28
0
 def __init__(self,
              mode: ChineseNumberExtractorMode = ChineseNumberExtractorMode.
              DEFAULT):
     self.__regexes = [
         ReVal(re=RegExpUtility.get_safe_reg_exp(
             ChineseNumeric.NumbersSpecialsChars),
               val='IntegerNum'),
         ReVal(re=RegExpUtility.get_safe_reg_exp(
             ChineseNumeric.NumbersSpecialsCharsWithSuffix),
               val='IntegerNum'),
         ReVal(re=RegExpUtility.get_safe_reg_exp(
             ChineseNumeric.DottedNumbersSpecialsChar),
               val='IntegerNum'),
         ReVal(re=RegExpUtility.get_safe_reg_exp(
             ChineseNumeric.NumbersWithHalfDozen),
               val='IntegerChs'),
         ReVal(re=RegExpUtility.get_safe_reg_exp(
             ChineseNumeric.NumbersWithDozen),
               val='IntegerChs')
     ]
     if mode == ChineseNumberExtractorMode.DEFAULT:
         self.__regexes.append(
             ReVal(re=RegExpUtility.get_safe_reg_exp(
                 ChineseNumeric.NumbersWithAllowListRegex),
                   val='IntegerChs'))
     elif mode == ChineseNumberExtractorMode.EXTRACT_ALL:
         self.__regexes.append(
             ReVal(re=RegExpUtility.get_safe_reg_exp(
                 ChineseNumeric.NumbersAggressiveRegex),
                   val='IntegerChs'))
 def __init__(self, config):
     self._duration_extractor = BaseDurationExtractor(
         PortugueseDurationExtractorConfiguration(), False)
     self._cardinal_extractor: BaseNumberExtractor = PortugueseCardinalExtractor(
     )
     self._number_parser: BaseNumberParser = BaseNumberParser(
         PortugueseNumberParserConfiguration())
     self._followed_unit: Pattern = RegExpUtility.get_safe_reg_exp(
         PortugueseDateTime.FollowedUnit)
     self._suffix_and_regex: Pattern = RegExpUtility.get_safe_reg_exp(
         PortugueseDateTime.SuffixAndRegex)
     self._number_combined_with_unit: Pattern = RegExpUtility.get_safe_reg_exp(
         PortugueseDateTime.DurationNumberCombinedWithUnit)
     self._an_unit_regex: Pattern = RegExpUtility.get_safe_reg_exp(
         PortugueseDateTime.AnUnitRegex)
     self._all_date_unit_regex: Pattern = RegExpUtility.get_safe_reg_exp(
         PortugueseDateTime.AllRegex)
     self._half_date_unit_regex: Pattern = RegExpUtility.get_safe_reg_exp(
         PortugueseDateTime.HalfRegex)
     self._inexact_number_unit_regex: Pattern = RegExpUtility.get_safe_reg_exp(
         PortugueseDateTime.InexactNumberUnitRegex)
     self._unit_map: Dict[str, str] = PortugueseDateTime.UnitMap
     self._unit_value_map: Dict[str, int] = PortugueseDateTime.UnitValueMap
     self._double_numbers: Dict[str,
                                float] = PortugueseDateTime.DoubleNumbers
Beispiel #30
0
 def __init__(self, config):
     self.__before_regex = RegExpUtility.get_safe_reg_exp(
         EnglishDateTime.BeforeRegex)
     self.__after_regex = RegExpUtility.get_safe_reg_exp(
         EnglishDateTime.AfterRegex)
     self.__since_regex = RegExpUtility.get_safe_reg_exp(
         EnglishDateTime.SinceRegex)
     self.__holiday_parser = BaseHolidayParser(
         EnglishHolidayParserConfiguration(config))
     self.__date_parser = config.date_parser
     self.__time_parser = config.time_parser
     self.__date_time_parser = config.date_time_parser
     self.__date_period_parser = config.date_period_parser
     self.__time_period_parser = config.time_period_parser
     self.__date_time_period_parser = config.date_time_period_parser
     self.__duration_parser = config.duration_parser
     self.__set_parser = BaseSetParser(
         EnglishSetParserConfiguration(config))