Exemple #1
0
    def parse_each_unit(self, source: str) -> DateTimeResolutionResult:
        result = DateTimeResolutionResult()
        # handle "daily", "weekly"
        match = regex.match(self.config.periodic_regex, source)
        if match:
            get_matched_daily_timex = self.config.get_matched_daily_timex(
                source)
            if not get_matched_daily_timex.matched:
                return result

            result.timex = get_matched_daily_timex.timex
            result.future_value = result.past_value = 'Set: ' + result.timex
            result.success = True

        # handle "each month"
        match = regex.match(self.config.each_unit_regex, source)
        if match and len(match.group()) == len(source):
            source_unit = RegExpUtility.get_group(match, 'unit')
            if source_unit and source_unit in self.config.unit_map:
                get_matched_unit_timex = self.config.get_matched_unit_timex(
                    source_unit)
                if not get_matched_unit_timex.matched:
                    return result

                if RegExpUtility.get_group(match, 'other'):
                    get_matched_unit_timex = MatchedTimex(
                        matched=get_matched_unit_timex.matched,
                        timex=get_matched_unit_timex.timex.replace('1', '2'))

                result.timex = get_matched_unit_timex.timex
                result.future_value = result.past_value = 'Set: ' + result.timex
                result.success = True

        return result
Exemple #2
0
    def adjust_by_prefix(self, prefix: str, adjust: AdjustParams):
        delta_min = 0
        trimmed_prefix = prefix.strip().lower()

        if trimmed_prefix.endswith('demie'):
            delta_min = 30
        elif trimmed_prefix.endswith('un quart') or trimmed_prefix.endswith(
                'quart'):
            delta_min = 15
        elif trimmed_prefix.endswith('trois quarts'):
            delta_min = 45
        else:
            match = regex.search(self.less_than_one_hour, trimmed_prefix)
            if match:
                min_str = RegExpUtility.get_group(match, 'deltamin')
                if min_str:
                    delta_min = int(min_str)
                else:
                    min_str = RegExpUtility.get_group(match,
                                                      'deltaminnum').lower()
                    delta_min = self.numbers.get(min_str)

        if trimmed_prefix.endswith('à'):
            delta_min = delta_min * -1

        adjust.minute += delta_min

        if adjust.minute < 0:
            adjust.minute += 60
            adjust.hour -= 1

        adjust.has_minute = True
    def validate_match(self, match: Match, text: str):

        # If the match doesn't contains "year" part, it will not be ambiguous and it's a valid match
        is_valid_match = not RegExpUtility.get_group(
            match, Constants.YEAR_GROUP_NAME)

        if not is_valid_match:
            year_group = RegExpUtility.get_group(
                match, Constants.YEAR_GROUP_NAME)
            # If the "year" part is not at the end of the match, it's a valid match
            if not text.index(year_group) + len(year_group) == text.index(match.group()) + (match.end() - match.start()):
                is_valid_match = True
            else:
                sub_text = text[text.index(year_group):]

                # If the following text (include the "year" part) doesn't start with a Date entity, it's a valid match
                if not self.starts_with_basic_date(sub_text):
                    is_valid_match = True
                else:

                    # If the following text (include the "year" part) starts with a Date entity,
                    # but the following text (doesn't include the "year" part) also starts with a
                    # valid Date entity, the current match is still valid
                    # For example, "10-1-2018-10-2-2018". Match "10-1-2018" is valid because
                    # though "2018-10-2" a valid match (indicates the first year "2018" might
                    # belongs to the second Date entity), but "10-2-2018" is also

                    sub_text = text[text.index(year_group) + len(year_group):].strip()
                    sub_text = self.trim_start_range_connector_symbols(sub_text)
                    is_valid_match = self.starts_with_basic_date(sub_text)

        return is_valid_match
    def adjust_by_prefix(self, prefix: str, adjust: AdjustParams):
        delta_min = 0
        trimmed_prefix = prefix.strip().lower()

        # @todo Move hardcoded strings to resource YAML file.
        if trimmed_prefix.endswith('mezza') or trimmed_prefix.endswith(
                'mezzo'):
            delta_min = 30
        elif trimmed_prefix.endswith('un quarto') or trimmed_prefix.endswith(
                'quarto'):
            delta_min = 15
        elif trimmed_prefix.endswith('tre quarti'):
            delta_min = 45
        else:
            match = regex.search(self.less_than_one_hour, trimmed_prefix)
            if match:
                min_str = RegExpUtility.get_group(match, 'deltamin')
                if min_str:
                    delta_min = int(min_str)
                else:
                    min_str = RegExpUtility.get_group(match,
                                                      'deltaminnum').lower()
                    delta_min = self.numbers.get(min_str)

        if trimmed_prefix.startswith('meno'):
            delta_min = delta_min * -1

        adjust.minute += delta_min

        if adjust.minute < 0:
            adjust.minute += 60
            adjust.hour -= 1

        adjust.has_minute = True
Exemple #5
0
 def adjust_by_prefix(self, prefix: str, adjust: AdjustParams):
     delta_min = 0
     prefix = prefix.strip().lower()
     if prefix.startswith('meia') or prefix.startswith('e meia'):
         delta_min = 30
     elif prefix.startswith('quarto') or prefix.startswith('e um quarto') \
             or prefix.startswith('quinze') or prefix.startswith('e quinze'):
         delta_min = 15
     elif prefix.startswith('menos um quarto'):
         delta_min = -15
     else:
         match = regex.search(self.less_than_one_hour, prefix)
         min_str = RegExpUtility.get_group(match, 'deltamin')
         if min_str:
             delta_min = int(min_str)
         else:
             min_str = RegExpUtility.get_group(match, 'deltaminnum').lower()
             delta_min = self.numbers[min_str]
     if prefix.endswith('para a') or prefix.endswith('para as') or prefix.endswith('pra')  \
             or prefix.endswith('pras') or prefix.endswith('antes da') or prefix.endswith('antes das'):
         delta_min = delta_min * -1
     adjust.minute += delta_min
     if adjust.minute < 0:
         adjust.minute += 60
         adjust.hour -= 1
     adjust.has_minute = True
Exemple #6
0
 def adjust_by_suffix(self, suffix: str, adjust: AdjustParams):
     suffix = suffix.strip().lower()
     delta_hour = 0
     match = regex.search(self.time_suffix_full, suffix)
     if match is not None and match.start() == 0 and match.group(
     ) == suffix:
         oclock_str = RegExpUtility.get_group(match, 'oclock')
         if not oclock_str:
             am_str = RegExpUtility.get_group(match, 'am')
             if am_str:
                 if adjust.hour >= 12:
                     delta_hour -= 12
                 else:
                     adjust.has_am = True
             pm_str = RegExpUtility.get_group(match, 'pm')
             if pm_str:
                 if adjust.hour < 12:
                     delta_hour = 12
                 if regex.search(self.night_regex, pm_str):
                     if adjust.hour <= 3 or adjust.hour == 12:
                         if adjust.hour == 12:
                             adjust.hour = 0
                         delta_hour = 0
                         adjust.has_am = True
                     else:
                         adjust.has_pm = True
     adjust.hour = (adjust.hour + delta_hour) % 24
Exemple #7
0
    def __parse_month_with_year(self, source: str, reference: datetime) -> DateTimeResolutionResult:
        trimmed_source = source.strip().lower()
        result = DateTimeResolutionResult()
        match = self.config.month_with_year.search(trimmed_source)

        if not match:
            match = self.config.month_num_with_year.search(trimmed_source)

        if not (match and match.end() - match.start() == len(trimmed_source)):
            return result

        month_str = RegExpUtility.get_group(match, 'month')
        year_str = RegExpUtility.get_group(match, 'year')
        order_str = RegExpUtility.get_group(match, 'order')
        month = self.config.month_of_year.get(month_str)
        try:
            year = int(year_str)
        except ValueError:
            swift = self.config.get_swift_year(order_str)
            if swift < 1:
                return result
            year = reference.year + swift
        begin_date = DateUtils.safe_create_from_value(DateUtils.min_value, year, month, 1)
        add_days = -1 if self._inclusive_end_period else 0
        end_date = begin_date + datedelta(months=1) + datedelta(days=add_days)
        result.future_value = [begin_date, end_date]
        result.past_value = [begin_date, end_date]
        result.timex = f'{year:04d}-{month:02d}'
        result.success = True

        return result
Exemple #8
0
    def match_each(self, extractor: DateTimeExtractor, source: str,
                   reference: datetime) -> List[Token]:
        for match in regex.finditer(self.config.set_each_regex, source):
            trimmed_source = source[0:match.start()] + source[match.end():]

            for extract_result in extractor.extract(trimmed_source, reference):
                if (extract_result.start <= match.start()
                        and extract_result.start + extract_result.length >
                        match.start()):
                    yield Token(
                        extract_result.start, extract_result.start +
                        extract_result.length + len(match.group()))

        for match in regex.finditer(self.config.set_week_day_regex, source):
            trimmed_source = source[0:match.start()] + RegExpUtility.get_group(
                match, 'weekday') + source[match.end():]

            for extract_result in extractor.extract(trimmed_source, reference):
                if extract_result.start <= match.start(
                ) and RegExpUtility.get_group(
                        match, 'weekday') in extract_result.text:
                    length = extract_result.length + 1
                    prefix = RegExpUtility.get_group(match, 'prefix')
                    if prefix:
                        length += len(prefix)

                    yield Token(extract_result.start,
                                extract_result.start + length)
    def parse_time_of_day(self, source: str,
                          reference: datetime) -> DateTimeResolutionResult:
        result = DateTimeResolutionResult()
        year = reference.year
        month = reference.month
        day = reference.day

        # extract early/late prefix from text
        has_early = False
        has_late = False
        match = regex.search(self.config.time_of_day_regex, source)
        if match is not None:
            early = RegExpUtility.get_group(match, Constants.COMMENT_EARLY)
            if early:
                has_early = True
                source = source.replace(early, '')
                result.comment = Constants.COMMENT_EARLY
                result.mod = TimeTypeConstants.EARLY_MOD
            late = RegExpUtility.get_group(match, Constants.COMMENT_LATE)
            if late:
                has_late = True
                source = source.replace(late, '')
                result.comment = Constants.COMMENT_LATE
                result.mod = TimeTypeConstants.LATE_MOD

        timex_range = self.config.get_matched_timex_range(source)
        if not timex_range.matched:
            return result

        # modify time period if "early" or "late" is existed
        if has_early:
            timex_range = MatchedTimeRegex(matched=timex_range.matched,
                                           timex=timex_range.timex,
                                           begin_hour=timex_range.begin_hour,
                                           end_hour=timex_range.begin_hour + 2,
                                           end_min=0 if timex_range.end_min
                                           == 59 else timex_range.end_min)
        elif has_late:
            timex_range = MatchedTimeRegex(matched=timex_range.matched,
                                           timex=timex_range.timex,
                                           begin_hour=timex_range.begin_hour +
                                           2,
                                           end_hour=timex_range.end_hour,
                                           end_min=timex_range.end_min)

        result.timex = timex_range.timex
        result.future_value = ResolutionStartEnd()
        result.past_value = ResolutionStartEnd()
        result.future_value.start = datetime(year, month, day,
                                             timex_range.begin_hour, 0, 0)
        result.future_value.end = datetime(year, month, day,
                                           timex_range.end_hour,
                                           timex_range.end_min,
                                           timex_range.end_min)
        result.past_value.start = result.future_value.start
        result.past_value.end = result.future_value.end

        result.success = True
        return result
    def parse_weekday_of_month(self, source: str,
                               reference: datetime) -> DateTimeParseResult:
        trimmed_source = source.strip()
        result = DateTimeResolutionResult()
        match = regex.match(self.config.week_day_of_month_regex,
                            trimmed_source)

        if not match:
            return result

        cardinal_str = RegExpUtility.get_group(match, 'cardinal')
        weekday_str = RegExpUtility.get_group(match, 'weekday')
        month_str = RegExpUtility.get_group(match, 'month')
        no_year = False
        cardinal = 5 if self.config.is_cardinal_last(
            cardinal_str) else self.config.cardinal_map.get(cardinal_str)
        weekday = self.config.day_of_week.get(weekday_str)
        month = reference.month
        year = reference.year

        if not month_str:
            swift = self.config.get_swift_month(trimmed_source)
            temp = reference.replace(month=reference.month + swift)
            month = temp.month
            year = temp.year
        else:
            month = self.config.month_of_year.get(month_str)
            no_year = True

        value = self._compute_date(cardinal, weekday, month, year)

        if value.month != month:
            cardinal -= 1
            value = value.replace(day=value.day - 7)

        future_date = value
        past_date = value

        if no_year and future_date < reference:
            future_date = self._compute_date(cardinal, weekday, month,
                                             year + 1)
            if future_date.month != month:
                future_date = future_date.replace(day=future_date.day - 7)

        if no_year and past_date >= reference:
            past_date = self._compute_date(cardinal, weekday, month, year - 1)
            if past_date.month != month:
                past_date = past_date.replace(day=past_date.date - 7)

        result.timex = '-'.join([
            'XXXX',
            FormatUtil.to_str(month, 2), 'WXX',
            str(weekday), '#' + str(cardinal)
        ])
        result.future_value = future_date
        result.past_value = past_date
        result.success = True
        return result
Exemple #11
0
    def __parse_quarter(self, source: str, reference: datetime) -> DateTimeResolutionResult:
        result = DateTimeResolutionResult()
        match = self.config.quarter_regex.search(source)
        if not (match and len(match.group()) == len(source)):
            match = self.config.quarter_regex_year_front.search(source)
        if not (match and len(match.group()) == len(source)):
            return result

        cardinal_str = RegExpUtility.get_group(match, 'cardinal')
        year_str = RegExpUtility.get_group(match, 'year')
        order_str = RegExpUtility.get_group(match, 'order')
        quarter_str = RegExpUtility.get_group(match, 'number')

        no_specific_value = False
        try:
            year = int(year_str)
        except (ValueError, TypeError):
            order_str = '' if order_str is None else order_str
            swift = self.config.get_swift_year(order_str)
            if swift < -1:
                swift = 0
                no_specific_value = True
            year = reference.year + swift

        if quarter_str:
            quarter_num = int(quarter_str)
        else:
            quarter_num = self.config.cardinal_map[cardinal_str]

        begin_date = DateUtils.safe_create_date_resolve_overflow(year, ((quarter_num - 1) * Constants.TrimesterMonthCount) + 1, 1)
        end_date = DateUtils.safe_create_date_resolve_overflow(year, (quarter_num * Constants.TrimesterMonthCount) + 1, 1)

        if no_specific_value:
            if end_date < reference:
                result.past_value = [begin_date, end_date]

                future_begin_date = DateUtils.safe_create_date_resolve_overflow(year + 1, ((quarter_num - 1) * Constants.TrimesterMonthCount) + 1, 1)
                future_end_date = DateUtils.safe_create_date_resolve_overflow(year + 1, (quarter_num * Constants.TrimesterMonthCount) + 1, 1)
                result.future_value = [future_begin_date, future_end_date]
            elif end_date > reference:
                result.future_value = [begin_date, end_date]

                past_begin_date = DateUtils.safe_create_date_resolve_overflow(year - 1, ((quarter_num - 1) * Constants.TrimesterMonthCount) + 1, 1)
                past_end_date = DateUtils.safe_create_date_resolve_overflow(year - 1, (quarter_num * Constants.TrimesterMonthCount) + 1, 1)
                result.past_value = [past_begin_date, past_end_date]
            else:
                result.future_value = [begin_date, end_date]
                result.past_value = [begin_date, end_date]
        else:
            result.future_value = [begin_date, end_date]
            result.past_value = [begin_date, end_date]

        result.timex = f'({FormatUtil.luis_date_from_datetime(begin_date)},{FormatUtil.luis_date_from_datetime(end_date)},P3M)'
        result.success = True
        return result
    def match_simple_cases(self, source: str) -> List[Token]:
        result: List[Token] = list()

        for pattern in self.config.simple_cases_regex:
            for match in regex.finditer(pattern, source):
                pm = RegExpUtility.get_group(match, 'pm')
                am = RegExpUtility.get_group(match, 'am')
                desc = RegExpUtility.get_group(match, 'desc')

                if pm or am or desc:
                    result.append(Token(match.start(), match.end()))

        return result
Exemple #13
0
    def _is_valid_match(self, match: Match) -> bool:
        is_valid_tld = False
        is_ip_url = RegExpUtility.get_group(match, 'IPurl')

        if not is_ip_url:
            tld_string = RegExpUtility.get_group(match, 'Tld')
            tld_matches = self.tld_matcher().find(tld_string)
            if any(o.start == 0 and o.end == len(tld_string) for o in tld_matches):
                is_valid_tld = True

        # For cases like "7.am" or "8.pm" which are more likely time terms.
        if re.match(self.ambiguous_time_term.re, match.group(0)) is not None:
            return False
        return is_valid_tld or is_ip_url
    def match_to_date(self, match, reference: datetime):
        from .utilities import DateTimeResolutionResult
        from .utilities import DateUtils
        from .utilities import DateTimeFormatUtil
        result = DateTimeResolutionResult()
        year_str = RegExpUtility.get_group(match, Constants.YEAR_GROUP_NAME)
        month_str = RegExpUtility.get_group(match, Constants.MONTH_GROUP_NAME)
        day_str = RegExpUtility.get_group(match, Constants.DAY_GROUP_NAME)
        month = 0
        day = 0
        year = 0

        if month_str in self.config.month_of_year and day_str in self.config.day_of_month:
            month = self.config.month_of_year.get(month_str)
            day = self.config.day_of_month.get(day_str)

            if year_str:
                year = int(year_str) if year_str.isnumeric() else 0

                if 100 > year >= Constants.MIN_TWO_DIGIT_YEAR_PAST_NUM:
                    year += 1900
                elif 0 <= year < Constants.MAX_TWO_DIGIT_YEAR_FUTURE_NUM:
                    year += 2000

        no_year = False

        if year == 0:
            year = reference.year
            result.timex = DateTimeFormatUtil.luis_date(-1, month, day)
            no_year = True
        else:
            result.timex = DateTimeFormatUtil.luis_date(year, month, day)

        future_date = DateUtils.safe_create_from_min_value(year, month, day)
        past_date = DateUtils.safe_create_from_min_value(year, month, day)

        if no_year and future_date < reference:
            future_date = DateUtils.safe_create_from_min_value(
                year + 1, month, day)

        if no_year and past_date >= reference:
            past_date = DateUtils.safe_create_from_min_value(
                year - 1, month, day)

        result.future_value = future_date
        result.past_value = past_date
        result.success = True
        return result
Exemple #15
0
    def parse_each(self, extractor: DateTimeExtractor, parser: DateTimeParser,
                   source: str,
                   reference: datetime) -> DateTimeResolutionResult:
        result = DateTimeResolutionResult()
        success = False
        er: List[ExtractResult] = list()

        match = regex.search(self.config.set_each_regex, source)
        if match:
            trimmed_text = source[0:match.start()] + source[match.end():]
            er = extractor.extract(trimmed_text, reference)
            if (len(er) == 1 and er[0].length == len(trimmed_text)):
                success = True

        match = regex.search(self.config.set_week_day_regex, source)
        if match:
            trimmed_text = source[0:match.start()] + RegExpUtility.get_group(
                match, 'weekday') + source[match.end():]
            er = extractor.extract(trimmed_text, reference)
            if len(er) == 1 and er[0].length == len(trimmed_text):
                success = True

        if success:
            pr = parser.parse(er[0])
            result.timex = pr.timex_str
            result.future_value = 'Set: ' + pr.timex_str
            result.past_value = 'Set: ' + pr.timex_str
            result.success = True

        return result
    def match_to_date(self, match,
                      reference: datetime) -> DateTimeResolutionResult:
        result = DateTimeResolutionResult()
        year_str = RegExpUtility.get_group(match, 'year')
        month_str = RegExpUtility.get_group(match, 'month')
        day_str = RegExpUtility.get_group(match, 'day')
        month = 0
        day = 0
        year = 0

        if month_str in self.config.month_of_year and day_str in self.config.day_of_month:
            month = self.config.month_of_year.get(month_str)
            day = self.config.day_of_month.get(day_str)

            if year_str:
                year = int(year_str) if year_str.isnumeric() else 0

                if year < 100 and year >= 90:
                    year += 1900
                elif year < 100 and year < 20:
                    year += 2000

        no_year = False

        if year == 0:
            year = reference.year
            result.timex = FormatUtil.luis_date(-1, month, day)
            no_year = True
        else:
            result.timex = FormatUtil.luis_date(year, month, day)

        future_date = DateUtils.safe_create_from_min_value(year, month, day)
        past_date = DateUtils.safe_create_from_min_value(year, month, day)

        if no_year and future_date < reference:
            future_date = DateUtils.safe_create_from_min_value(
                year + 1, month, day)

        if no_year and past_date >= reference:
            past_date = DateUtils.safe_create_from_min_value(
                year - 1, month, day)

        result.future_value = future_date
        result.past_value = past_date
        result.success = True
        return result
Exemple #17
0
    def get_year_from_text(self, match: Match) -> int:
        year = Constants.INVALID_YEAR

        year_str = RegExpUtility.get_group(match, 'year')
        if year_str and not (str.isspace(year_str) or year_str is None):
            year = int(year_str)
            if 100 > year >= Constants.MIN_TWO_DIGIT_YEAR_PAST_NUM:
                year += 1900
            elif 0 <= year < Constants.MAX_TWO_DIGIT_YEAR_FUTURE_NUM:
                year += 2000
        else:
            first_two_year_num_str = RegExpUtility.get_group(
                match, Constants.FIRST_TWO_YEAR_NUM)

            if first_two_year_num_str and not (
                    str.isspace(first_two_year_num_str)
                    or first_two_year_num_str is None):

                er = ExtractResult()
                er.text = first_two_year_num_str
                er.start = match.string.index(
                    RegExpUtility.get_group(match,
                                            Constants.FIRST_TWO_YEAR_NUM))
                er.length = len(
                    RegExpUtility.get_group(match,
                                            Constants.FIRST_TWO_YEAR_NUM))

                first_two_year_num = self.config.number_parser.parse(er).value if \
                    self.config.number_parser.parse(er).value else 0

                last_two_year_num = 0
                last_two_year_num_str = RegExpUtility.get_group(
                    match, Constants.LAST_TWO_YEAR_NUM)

                if not (str.isspace(last_two_year_num_str)
                        or last_two_year_num_str is None):
                    er = ExtractResult()
                    er.text = last_two_year_num_str
                    er.start = match.string.index(
                        RegExpUtility.get_group(match,
                                                Constants.LAST_TWO_YEAR_NUM))
                    er.length = len(
                        RegExpUtility.get_group(match,
                                                Constants.LAST_TWO_YEAR_NUM))

                    last_two_year_num = self.config.number_parser.parse(er).value if \
                        self.config.number_parser.parse(er).value else 0

                if (first_two_year_num < 100 and last_two_year_num == 0)\
                        or (first_two_year_num < 100 and first_two_year_num % 10 == 0
                            and len(last_two_year_num_str.strip().split(' ')) == 1):
                    year = Constants.INVALID_YEAR
                    return year

                if first_two_year_num >= 100:
                    year = first_two_year_num + last_two_year_num
                else:
                    year = (first_two_year_num * 100) + last_two_year_num

        return year
def parse_chinese_dynasty_year(year_str: str, dynasty_year_regex: Pattern,
                               dynasty_start_year: str, dynasty_year_map: dict,
                               integer_extractor, number_parser):
    dynasty_year_match = regex.search(dynasty_year_regex, year_str)
    if dynasty_year_match and dynasty_year_match.start() == 0 and len(
            dynasty_year_match.group()) == len(year_str):
        # handle "康熙元年" refer to https://zh.wikipedia.org/wiki/%E5%B9%B4%E5%8F%B7
        dynasty_str = RegExpUtility.get_group(dynasty_year_match, "dynasty")
        bias_year_str = RegExpUtility.get_group(dynasty_year_match, "biasYear")
        basic_year = dynasty_year_map[dynasty_str]
        if bias_year_str == dynasty_start_year:
            bias_year = 1
        else:
            er = next(iter(integer_extractor.extract(bias_year_str)), None)
            bias_year = int(number_parser.parse(er).value)
        year = int(basic_year + bias_year - 1)
        return year
    return None
Exemple #19
0
    def adjust_by_prefix(self, prefix: str, adjust: AdjustParams):
        delta_min = 0
        prefix = prefix.strip().lower()

        if prefix.startswith('cuarto') or prefix.startswith('y cuarto'):
            delta_min = 15
        elif prefix.startswith('menos cuarto'):
            delta_min = -15
        elif prefix.startswith('media') or prefix.startswith('y media'):
            delta_min = 30
        elif prefix.startswith('three quarter'):
            delta_min = 45
        else:
            match = regex.search(self.less_than_one_hour, prefix)
            if match:
                min_str = RegExpUtility.get_group(match, 'deltamin')
                if min_str:
                    delta_min = int(min_str)
                else:
                    min_str = RegExpUtility.get_group(
                        match, 'deltaminnum').lower()
                    delta_min = self.numbers.get(min_str)

        if (
            prefix.endswith('pasadas') or prefix.endswith('pasados') or
            prefix.endswith('pasadas las') or prefix.endswith('pasados las') or
            prefix.endswith('pasadas de las') or prefix.endswith(
                'pasados de las')
        ):
            # deltaMin it's positive
            pass
        elif (
            prefix.endswith('para la') or prefix.endswith('para las') or
            prefix.endswith('antes de la') or prefix.endswith('antes de las')
        ):
            delta_min = delta_min * -1

        adjust.minute += delta_min

        if adjust.minute < 0:
            adjust.minute += 60
            adjust.hour -= 1

        adjust.has_minute = True
Exemple #20
0
    def match_simple_cases(self, source: str) -> List[Token]:
        result = []

        for regexp in self.config.simple_cases_regex:
            matches = regex.finditer(regexp, source)

            if matches:
                for match in matches:

                    # Cases like "from 10:30 to 11", don't necessarily need "am/pm"
                    if RegExpUtility.get_group(match, Constants.MINUTE_GROUP_NAME) or\
                            RegExpUtility.get_group(match, Constants.SECOND_GROUP_NAME):

                        # Cases like "from 3:30 to 4" should be supported
                        # Cases like "from 3:30 to 5 on 1/1/2015" should be supported
                        # Cases like "from 3:30 to 4 people" is considered not valid
                        end_with_valid_token = False

                        # "No extra tokens after the time period"
                        if (source.index(match.group()) + (match.end() - match.start())) == len(source):
                            end_with_valid_token = True

                        else:
                            after_str = source[source.index(match.group()) + (match.end() - match.start()):]

                            end_with_general_endings = self.config.general_ending_regex.match(after_str)
                            end_with_am_pm = RegExpUtility.get_group(match, Constants.RIGHT_AM_PM_GROUP_NAME)

                            if end_with_general_endings or end_with_am_pm or\
                                    after_str.lstrip().startswith(self.config.token_before_date):
                                end_with_valid_token = True
                            elif (self.config.options & DateTimeOptions.ENABLE_PREVIEW) != 0:
                                # When TimeZone be migrated enable it
                                end_with_valid_token = False

                        if end_with_valid_token:
                            result.append(Token(source.index(match.group()), source.index(match.group()) +
                                                (match.end() - match.start())))
                    else:
                        # Is there "pm" or "am"?
                        match_pm_str = RegExpUtility.get_group(match, Constants.PM_GROUP_NAME)
                        match_am_str = RegExpUtility.get_group(match, Constants.AM_GROUP_NAME)
                        desc_str = RegExpUtility.get_group(match, Constants.DESC_GROUP_NAME)

                        # Check "pm", "am"
                        if match_pm_str or match_am_str or desc_str:
                            result.append(Token(source.index(match.group()), source.index(match.group()) +
                                                (match.end() - match.start())))
                        else:
                            after_str = source[source.index(match.group()) + (match.end() - match.start()):]

                            # When TimeZone be migrated enable it
                            if (self.config.options & DateTimeOptions.ENABLE_PREVIEW) != 0:
                                result.append(Token(source.index(match.group()),
                                                    source.index(match.group()) + (match.end() - match.start())))

        return result
Exemple #21
0
    def _parse_week_of_month(self, source: str, reference: datetime) -> DateTimeResolutionResult:
        result = DateTimeResolutionResult()
        match = self.config.week_of_month_regex.search(source)

        if not (match and len(match.group()) == len(source)):
            return result

        cardinal_str = RegExpUtility.get_group(match, 'cardinal')
        month_str = RegExpUtility.get_group(match, 'month')
        month = reference.month
        year = reference.year
        no_year = False
        cardinal = 5 if self.config.is_last_cardinal(cardinal_str) else self.config.cardinal_map.get(cardinal_str)

        if not month_str:
            swift = self.config.get_swift_day_or_month(source)
            temp_data = reference + datedelta(months=swift)
            month = temp_data.month
            year = temp_data.year
        else:
            month = self.config.month_of_year.get(month_str)
            no_year = True

        return self._get_week_of_month(cardinal, month, year, reference, no_year)
    def adjust_by_prefix(self, prefix: str, adjust: AdjustParams):
        delta_min = 0
        trimmed_prefix = prefix.strip().lower()

        # @todo Move hardcoded strings to resource YAML file.
        if regex.search(self._half_token_regex, prefix):
            delta_min = -30
        elif regex.search(self._quarter_to_token_regex, prefix):
            delta_min = -15
        elif regex.search(self._quarter_past_token_regex, prefix):
            delta_min = 15
        elif regex.search(self._three_quarter_to_token_regex, prefix):
            delta_min = -45
        elif regex.search(self._three_quarter_past_token_regex, prefix):
            delta_min = 45
        else:
            match = regex.search(self.less_than_one_hour, trimmed_prefix)
            if match:
                min_str = RegExpUtility.get_group(match, 'deltamin')
                if min_str:
                    delta_min = int(min_str)
                else:
                    min_str = RegExpUtility.get_group(match,
                                                      'deltaminnum').lower()
                    delta_min = self.numbers.get(min_str)

        if trimmed_prefix.startswith('zum'):
            delta_min = delta_min * -1

        adjust.minute += delta_min

        if adjust.minute < 0:
            adjust.minute += 60
            adjust.hour -= 1

        adjust.has_minute = True
Exemple #23
0
    def adjust_by_suffix(self, suffix: str, adjust: AdjustParams):
        suffix = suffix.strip().lower()

        delta_hour = 0
        match = regex.match(self.time_suffix, suffix)

        if match and match.group() == suffix:
            oclock_str = RegExpUtility.get_group(match, 'heures')
            if not oclock_str:
                am_str = RegExpUtility.get_group(match, 'am')
                if am_str:
                    if adjust.hour >= 12:
                        delta_hour -= 12

                    adjust.has_am = True

                pm_str = RegExpUtility.get_group(match, 'pm')
                if pm_str:
                    if adjust.hour < 12:
                        delta_hour = 12

                    adjust.has_pm = True

        adjust.hour = (adjust.hour + delta_hour) % 24
Exemple #24
0
 def adjust_by_prefix(self, prefix: str, adjust: AdjustParams):
     delta_min = 0
     prefix = prefix.strip().lower()
     if prefix.startswith('half'):
         delta_min = 30
     elif prefix.startswith('a quarter') or prefix.startswith('quarter'):
         delta_min = 15
     elif prefix.startswith('three quarter'):
         delta_min = 45
     else:
         match = regex.search(self.less_than_one_hour, prefix)
         min_str = RegExpUtility.get_group(match, 'deltamin')
         if min_str:
             delta_min = int(min_str)
         else:
             min_str = RegExpUtility.get_group(match, 'deltaminnum').lower()
             delta_min = self.numbers[min_str]
     if prefix.endswith('to'):
         delta_min = delta_min * -1
     adjust.minute += delta_min
     if adjust.minute < 0:
         adjust.minute += 60
         adjust.hour -= 1
     adjust.has_minute = True
Exemple #25
0
    def parse_ish(self, source: str, reference: datetime) -> DateTimeResolutionResult:
        result = DateTimeResolutionResult()
        trimmed_source = source.strip().lower()

        match = RegExpUtility.get_safe_reg_exp(FrenchDateTime.IshRegex).match(source)
        if match and match.end() == len(trimmed_source):
            hour_str = RegExpUtility.get_group(match, 'hour')
            hour = 12
            if hour_str:
                hour = int(hour_str)

            result.timex = 'T' + DateTimeFormatUtil.to_str(hour, 2)
            result.future_value = result.past_value = DateUtils.safe_create_from_min_value(reference.year, reference.month, reference.day, hour, 0, 0)
            result.success = True

        return result
Exemple #26
0
    def _merge_two_times_points(self, source: str, reference: datetime) -> DateTimeResolutionResult:
        trimmed_source = source.strip()
        result = DateTimeResolutionResult()
        ers = self.config.date_extractor.extract(trimmed_source, reference)

        if not ers or len(ers) < 2:
            ers = self.config.date_extractor.extract(self.config.token_before_date + trimmed_source, reference)

            for er in ers:
                er.start -= len(self.config.token_before_date)

            if not ers or len(ers) < 2:
                return result

        match = self.config.week_with_week_day_range_regex.search(source)
        if match:
            week_prefix = RegExpUtility.get_group(match, 'week')

            if week_prefix:
                ers[0].text = f'{week_prefix} {ers[0].text}'
                ers[1].text = f'{week_prefix} {ers[1].text}'

        prs = []

        for er in ers:
            pr = self.config.date_parser.parse(er, reference)
            if pr:
                prs.append(pr)

        if len(prs) < 2:
            return result

        pr_begin = prs[0]
        pr_end = prs[1]
        future_begin = pr_begin.value.future_value
        future_end = pr_end.value.future_value
        past_begin = pr_begin.value.past_value
        past_end = pr_end.value.past_value

        result.sub_date_time_entities = prs
        result.timex = f'({pr_begin.timex_str},{pr_end.timex_str},P{(future_end - future_begin).days}D)'
        result.future_value = [future_begin, future_end]
        result.past_value = [past_begin, past_end]
        result.success = True

        return result
    def extend_with_week_day_and_year(self, start_index: int, end_index: int, month: int,
                                      day: int, text: str, reference: datetime):
        from .utilities import DateUtils
        import calendar

        year = reference.year

        # Check whether there's a year
        suffix = text[end_index:]
        prefix = text[0: start_index]
        year_index, success = self.get_year_index(suffix, year, False)
        end_index += year_index

        # Check also in prefix
        if not success and self.config.check_both_before_after:
            year_index, success = self.get_year_index(suffix, year, False)
            start_index -= year_index

        # Check also in prefix
        date = DateUtils.safe_create_from_value(DateUtils.min_value, year, month, day)
        is_match_in_suffix = False
        match_week_day = self.config.week_day_end.match(prefix)

        if not match_week_day:
            match_week_day = self.config.week_day_start.match(suffix)
            is_match_in_suffix = True if match_week_day else False

        if match_week_day:
            # Get weekday from context directly, compare it with the weekday extraction above
            # to see whether they reference the same weekday
            extracted_week_day_str = RegExpUtility.get_group(
                match_week_day, Constants.WEEKDAY_GROUP_NAME)
            num_week_day_str = calendar.day_name[date.weekday()].lower()
            week_day_1 = self.config.day_of_week.get(num_week_day_str)
            week_day_2 = self.config.day_of_week.get(extracted_week_day_str)

            if self.config.day_of_week.get(num_week_day_str, week_day_1) and \
                    self.config.day_of_week.get(extracted_week_day_str, week_day_2):

                if not date == DateUtils.min_value and week_day_1 == week_day_2:
                    if not is_match_in_suffix:
                        start_index = match_week_day.start()
                    else:
                        end_index += match_week_day.end()

        return start_index, end_index
    def extend_with_week_day_and_year(self, start_index: int, end_index: int,
                                      month: int, day: int, text: str,
                                      reference: datetime):
        from .abstract_year_extractor import AbstractYearExtractor
        from .utilities import DateUtils
        import calendar
        year = reference.year

        # Check whether there's a year
        suffix = text[end_index:]
        match_year = self.config.year_suffix.match(suffix)

        if match_year and match_year.start() == 0:

            year = AbstractYearExtractor.get_year_from_text(self, match_year)

            if Constants.MIN_YEAR_NUM <= year <= Constants.MAX_YEAR_NUM:
                end_index += len(match_year.group())

        date = DateUtils.safe_create_from_value(DateUtils.min_value, year,
                                                month, day)

        # Check whether there's a weekday
        prefix = text[:start_index]
        match_week_day = self.config.week_day_end.match(prefix)

        if match_week_day:
            # Get weekday from context directly, compare it with the weekday extraction above
            # to see whether they reference the same weekday
            extracted_week_day_str = RegExpUtility.get_group(
                match_week_day, Constants.WEEKDAY_GROUP_NAME)
            num_week_day_str = calendar.day_name[date.weekday()].lower()

            if self.config.day_of_week.get(num_week_day_str) and \
                    self.config.day_of_week.get(extracted_week_day_str):

                week_day_1 = self.config.day_of_week.get(num_week_day_str)
                week_day_2 = self.config.day_of_week.get(
                    extracted_week_day_str)

                if not date == DateUtils.min_value and week_day_1 == week_day_2:
                    start_index = match_week_day.end()

        return start_index, end_index
Exemple #29
0
    def parse_ish(self, source: str,
                  reference: datetime) -> DateTimeResolutionResult:
        result = DateTimeResolutionResult()
        source = source.strip().lower()
        match = regex.search(self.config.ish_regex, source)
        if match is None or match.group() != source:
            return result

        hour_str = RegExpUtility.get_group(match, 'hour')
        hour = 12
        if hour_str:
            hour = int(hour_str)

        result.timex = f'T{hour:02d}'
        result.future_value = datetime(reference.year, reference.month,
                                       reference.day, hour, 0, 0)
        result.past_value = result.future_value
        result.success = True
        return result
Exemple #30
0
    def parse_each_unit(self, source: str) -> DateTimeResolutionResult:
        result = DateTimeResolutionResult()

        match = self.config.each_unit_regex.search(source)
        if not (match and (match.end() - match.start()) == len(source)):
            return result

        source_unit = RegExpUtility.get_group(match, 'unit')
        if not (source_unit and source_unit in self.config.unit_map):
            return result

        get_matched_unit_timex = self.config.get_matched_unit_timex(source_unit)
        if not get_matched_unit_timex.matched:
            return result

        result.timex = get_matched_unit_timex.timex
        result.future_value = result.past_value = 'Set: ' + result.timex
        result.success = True
        return result