def test_match_digi_clock(): fn = 'digi_clock' tf = [('kedden 8:45', [[Hour(8, fn), Minute(45, fn)]]), ('kedden 08:45', [[Hour(8, fn), Minute(45, fn)]]), ('ma este 18:12-kor', [[Hour(18, fn), Minute(12, fn)]])] for inp, exp in tf: out = match_digi_clock(inp) date_parts = [] for e in out: date_parts.append(e['date_parts']) assert date_parts == exp
def match_digi_clock(s: str) -> List[Dict[str, Any]]: """ Match digi clock format. :param s: textual input :return: tuple of date parts """ match = re.findall(R_DIGI, s) res = [] for group in match: group = [int(m.lstrip('0')) for m in group if m.lstrip('0')] if len(group) == 2: h, m = group res.append({ 'match': group, 'date_parts': [Hour(h, 'digi_clock'), Minute(m, 'digi_clock')] }) elif len(group) == 1: res.append({ 'match': group, 'date_parts': [Hour(group[0], 'digi_clock')] }) return res
def match_now(s: str, now: datetime) -> List[Dict[str, Any]]: if match_weekday(s, now): return [] match = re.match(r'.*\bmost\b.*', s.lower()) if match: date_parts = [ Year(now.year, 'now'), Month(now.month, 'now'), Day(now.day, 'now'), Hour(now.hour, 'now'), Minute(now.minute, 'now') ] return [{'match': 'most', 'date_parts': date_parts}] return []
def test_match_now(): now = datetime(2020, 12, 30, 12, 1) fn = 'now' tf = [('kedden 8:45', []), ('most kedden', []), ('legyen most', [[ Year(now.year, fn), Month(now.month, fn), Day(now.day, fn), Hour(now.hour, fn), Minute(now.minute, fn) ]])] for inp, exp in tf: out = match_now(inp, now) date_parts = [] for e in out: date_parts.append(e['date_parts']) assert date_parts == exp
def match_n_periods_compared_to_now(s: str, now: datetime) -> List[Dict[str, Any]]: # TODO: implement n-periods-before-now functionality fn = 'n_date_periods_compared_to_now' regexes = [ (R_NWEEKS_FROM_NOW, 'w'), (R_NDAYS_FROM_NOW, 'd'), (R_NHOURS_FROM_NOW, 'h'), (R_NMINS_FROM_NOW, 'm') ] res = [] for regex, freq in regexes: groups = re.findall(regex, s) for group in groups: date_parts = {'match': group, 'date_parts': []} n = group[1] if n: n = word_to_num(n) if freq == 'w': res_dt = (now + timedelta(days=7*n)) y, m, d = res_dt.year, res_dt.month, res_dt.day date_parts['date_parts'].extend([Year(y, fn), Month(m, fn), Day(d, fn)]) elif freq == 'd': res_dt = (now + timedelta(days=n)) y, m, d = res_dt.year, res_dt.month, res_dt.day date_parts['date_parts'].extend([Year(y, fn), Month(m, fn), Day(d, fn)]) elif freq == 'h': res_dt = (now + timedelta(hours=n)) y, m, d, h = res_dt.year, res_dt.month, res_dt.day, res_dt.hour date_parts['date_parts'].extend([Year(y, fn), Month(m, fn), Day(d, fn), Hour(h, fn)]) elif freq == 'm': res_dt = (now + timedelta(minutes=n)) y, mo, d, h, mi = res_dt.year, res_dt.month, res_dt.day, res_dt.hour, res_dt.minute date_parts['date_parts'].extend([Year(y, fn), Month(mo, fn), Day(d, fn), Hour(h, fn), Minute(mi, fn)]) res.append(date_parts) return res
tf = [('kedden 8:45', [[Hour(8, fn), Minute(45, fn)]]), ('kedden 08:45', [[Hour(8, fn), Minute(45, fn)]]), ('ma este 18:12-kor', [[Hour(18, fn), Minute(12, fn)]])] for inp, exp in tf: out = match_digi_clock(inp) date_parts = [] for e in out: date_parts.append(e['date_parts']) assert date_parts == exp time_word_fn = 'time_words' time_word_scenarios = [ ('reggel nyolc előtt hat perccel', [[Hour(7, time_word_fn), Minute(54, time_word_fn)]]), ('reggel nyolc előtt nyolcvan perccel', [[Hour(6, time_word_fn), Minute(40, time_word_fn)]]), ('este 8 előtt 12 perccel', [[Hour(19, time_word_fn), Minute(48, time_word_fn)]]), ('nyolc óra nyolc perckor', [[Hour(8, time_word_fn), Minute(8, time_word_fn)]]), ('ma reggel hat óra', [[Hour(6, time_word_fn)]]), ('ma reggel', [[Daypart(1, time_word_fn)]]), ('ma délután háromkor', [[Hour(15, time_word_fn)]]), ('ma délután három után negyvenhat perckor', [[Hour(15, time_word_fn), Minute(46, time_word_fn)]]), ('ma délután haemdknc után negyvenhat perckor', [[Daypart(3, time_word_fn)]]), ('ma', []), ('ötvenöt perckor', []),
def match_time_words(s: str) -> List[Dict[str, Any]]: """ :param s: textual input :return: tuple of date parts """ parts = _raw_match_time_words(s) if not parts: return [] else: group, daypart, hour_modifier, hour, minute = parts # Only numbers can match dates as well, this is an attempt to remove false matches hour_index = s.index(f'{hour}') before_hour = s[:hour_index].split() if before_hour: months = [ 'jan', 'feb', 'mar', 'apr', 'maj', 'jun', 'jul', 'aug', 'szep', 'okt', 'nov', 'dec' ] for month in months: if month in remove_accent(before_hour[-1]): return [] # Fix false time match for input 'jövő hét' if remove_accent(hour) == 'het': hour_indeces = [ m.start() for m in re.finditer('het(?!fo)', remove_accent(s)) ] if hour_indeces: before_hour = s[:hour_indeces[-1]].split() if before_hour: if 'jovo' in remove_accent(before_hour[-1]): return [] res = [] am = True date_parts = [] if daypart and hour: if 'reggel' in daypart or 'delelott' in remove_accent( daypart) or 'hajnal' in daypart: am = True elif 'delutan' in remove_accent( daypart) or 'este' in daypart or 'ejjel' in remove_accent( daypart): am = False if hour: # SKIP the whole matching rule when any of these apply # TODO: come up with a more elegant solution for this # TODO: i.e: by implementing the possibility of one rule exclude another # this is made redundant by the change in the patterns non_hours = ['ev', 'perc'] for nh in non_hours: if f' {nh}' in remove_accent(hour) or remove_accent( hour).startswith(nh): return [] if 'mulva' in remove_accent(s): return [] hour_num = word_to_num(hour) minute_num = word_to_num(minute) if not daypart: # default to business hour if daypart is not specified if hour_num < 8: hour_num += 12 if hour_modifier: if 'haromnegyed' in remove_accent(hour_modifier): hour_num = hour_num - 1 if hour_num - 1 >= 0 else 23 minute_num = 45 elif 'fel' in remove_accent(hour_modifier): hour_num = hour_num - 1 if hour_num - 1 >= 0 else 23 minute_num = 30 elif 'negyed' in remove_accent(hour_modifier): hour_num = hour_num - 1 if hour_num - 1 >= 0 else 23 minute_num = 15 if hour_num == NAN: return [] else: if hour_num < 12 and not am: hour_num += 12 if minute or hour_modifier: # this is made redundant by the change in the patterns non_minutes = ['ev', 'ora'] for nm in non_minutes: if f' {nm}' in remove_accent(minute) or remove_accent( minute).startswith(nm): return [] if 'elott' in remove_accent(minute) and not hour_modifier: hour_num -= (minute_num // 60) + 1 hour_num = hour_num if hour_num >= 0 else 23 date_parts.extend([ Hour(hour_num, 'time_words'), Minute(60 - (minute_num % 60), 'time_words') ]) elif 'elott' in remove_accent(minute) and hour_modifier: n_minutes_before = word_to_num(minute) if n_minutes_before != NAN: minute_num -= n_minutes_before if minute_num < 0: hour_num += (minute_num // 60) hour_num = hour_num if hour_num >= 0 else 23 minute_num = minute_num % 60 date_parts.extend([ Hour(hour_num, 'time_words'), Minute(minute_num, 'time_words') ]) elif hour_modifier: n_minutes_after = word_to_num(minute) if n_minutes_after != NAN: minute_num += n_minutes_after if minute_num > 59: hour_num += (minute_num // 60) hour_num = hour_num if hour_num <= 23 else 0 minute_num = minute_num % 60 date_parts.extend([ Hour(hour_num, 'time_words'), Minute(minute_num, 'time_words') ]) else: date_parts.extend([ Hour(hour_num, 'time_words'), Minute(minute_num, 'time_words') ]) else: date_parts.append(Hour(hour_num, 'time_words')) res.append({'match': group, 'date_parts': date_parts}) elif daypart: if 'hajnal' in daypart: res.append({ 'match': group, 'date_parts': [Daypart(0, 'time_words')] }) elif 'reggel' in daypart: res.append({ 'match': group, 'date_parts': [Daypart(1, 'time_words')] }) elif 'delelott' in remove_accent(daypart): res.append({ 'match': group, 'date_parts': [Daypart(2, 'time_words')] }) elif 'delutan' in remove_accent(daypart): res.append({ 'match': group, 'date_parts': [Daypart(3, 'time_words')] }) elif 'este' in daypart: res.append({ 'match': group, 'date_parts': [Daypart(4, 'time_words')] }) elif 'ejjel' in remove_accent(daypart): res.append({ 'match': group, 'date_parts': [Daypart(5, 'time_words')] }) return res