def test_match_digi_clock():
    fn = 'digi_clock'
    tf = [('kedden 8:45', [[Hour(8, fn), Minute(45, fn)]]),
          ('kedden 08:45', [[Hour(8, fn), Minute(45, fn)]]),
          ('ma este 18:12-kor', [[Hour(18, fn), Minute(12, fn)]])]

    for inp, exp in tf:
        out = match_digi_clock(inp)
        date_parts = []
        for e in out:
            date_parts.append(e['date_parts'])
        assert date_parts == exp
Example #2
0
def match_digi_clock(s: str) -> List[Dict[str, Any]]:
    """
    Match digi clock format.
    :param s: textual input
    :return: tuple of date parts
    """
    match = re.findall(R_DIGI, s)

    res = []
    for group in match:
        group = [int(m.lstrip('0')) for m in group if m.lstrip('0')]

        if len(group) == 2:
            h, m = group
            res.append({
                'match':
                group,
                'date_parts': [Hour(h, 'digi_clock'),
                               Minute(m, 'digi_clock')]
            })
        elif len(group) == 1:
            res.append({
                'match': group,
                'date_parts': [Hour(group[0], 'digi_clock')]
            })

    return res
Example #3
0
def match_now(s: str, now: datetime) -> List[Dict[str, Any]]:
    if match_weekday(s, now):
        return []

    match = re.match(r'.*\bmost\b.*', s.lower())
    if match:
        date_parts = [
            Year(now.year, 'now'),
            Month(now.month, 'now'),
            Day(now.day, 'now'),
            Hour(now.hour, 'now'),
            Minute(now.minute, 'now')
        ]
        return [{'match': 'most', 'date_parts': date_parts}]

    return []
def test_match_now():
    now = datetime(2020, 12, 30, 12, 1)
    fn = 'now'
    tf = [('kedden 8:45', []), ('most kedden', []),
          ('legyen most', [[
              Year(now.year, fn),
              Month(now.month, fn),
              Day(now.day, fn),
              Hour(now.hour, fn),
              Minute(now.minute, fn)
          ]])]

    for inp, exp in tf:
        out = match_now(inp, now)
        date_parts = []
        for e in out:
            date_parts.append(e['date_parts'])
        assert date_parts == exp
Example #5
0
def match_n_periods_compared_to_now(s: str, now: datetime) -> List[Dict[str, Any]]:
    # TODO: implement n-periods-before-now functionality
    fn = 'n_date_periods_compared_to_now'

    regexes = [
        (R_NWEEKS_FROM_NOW, 'w'),
        (R_NDAYS_FROM_NOW, 'd'),
        (R_NHOURS_FROM_NOW, 'h'),
        (R_NMINS_FROM_NOW, 'm')
    ]
    res = []

    for regex, freq in regexes:
        groups = re.findall(regex, s)
        for group in groups:
            date_parts = {'match': group, 'date_parts': []}

            n = group[1]
            if n:
                n = word_to_num(n)
                if freq == 'w':
                    res_dt = (now + timedelta(days=7*n))
                    y, m, d = res_dt.year, res_dt.month, res_dt.day
                    date_parts['date_parts'].extend([Year(y, fn), Month(m, fn), Day(d, fn)])
                elif freq == 'd':
                    res_dt = (now + timedelta(days=n))
                    y, m, d = res_dt.year, res_dt.month, res_dt.day
                    date_parts['date_parts'].extend([Year(y, fn), Month(m, fn), Day(d, fn)])
                elif freq == 'h':
                    res_dt = (now + timedelta(hours=n))
                    y, m, d, h = res_dt.year, res_dt.month, res_dt.day, res_dt.hour
                    date_parts['date_parts'].extend([Year(y, fn), Month(m, fn), Day(d, fn), Hour(h, fn)])
                elif freq == 'm':
                    res_dt = (now + timedelta(minutes=n))
                    y, mo, d, h, mi = res_dt.year, res_dt.month, res_dt.day, res_dt.hour, res_dt.minute
                    date_parts['date_parts'].extend([Year(y, fn), Month(mo, fn),
                                                     Day(d, fn), Hour(h, fn), Minute(mi, fn)])

            res.append(date_parts)

    return res
    tf = [('kedden 8:45', [[Hour(8, fn), Minute(45, fn)]]),
          ('kedden 08:45', [[Hour(8, fn), Minute(45, fn)]]),
          ('ma este 18:12-kor', [[Hour(18, fn), Minute(12, fn)]])]

    for inp, exp in tf:
        out = match_digi_clock(inp)
        date_parts = []
        for e in out:
            date_parts.append(e['date_parts'])
        assert date_parts == exp


time_word_fn = 'time_words'
time_word_scenarios = [
    ('reggel nyolc előtt hat perccel',
     [[Hour(7, time_word_fn), Minute(54, time_word_fn)]]),
    ('reggel nyolc előtt nyolcvan perccel',
     [[Hour(6, time_word_fn), Minute(40, time_word_fn)]]),
    ('este 8 előtt 12 perccel',
     [[Hour(19, time_word_fn),
       Minute(48, time_word_fn)]]),
    ('nyolc óra nyolc perckor',
     [[Hour(8, time_word_fn), Minute(8, time_word_fn)]]),
    ('ma reggel hat óra', [[Hour(6, time_word_fn)]]),
    ('ma reggel', [[Daypart(1, time_word_fn)]]),
    ('ma délután háromkor', [[Hour(15, time_word_fn)]]),
    ('ma délután három után negyvenhat perckor',
     [[Hour(15, time_word_fn),
       Minute(46, time_word_fn)]]),
    ('ma délután haemdknc után negyvenhat perckor',
     [[Daypart(3, time_word_fn)]]), ('ma', []), ('ötvenöt perckor', []),
Example #7
0
def match_time_words(s: str) -> List[Dict[str, Any]]:
    """
    :param s: textual input
    :return: tuple of date parts
    """
    parts = _raw_match_time_words(s)
    if not parts:
        return []
    else:
        group, daypart, hour_modifier, hour, minute = parts

    # Only numbers can match dates as well, this is an attempt to remove false matches
    hour_index = s.index(f'{hour}')
    before_hour = s[:hour_index].split()
    if before_hour:
        months = [
            'jan', 'feb', 'mar', 'apr', 'maj', 'jun', 'jul', 'aug', 'szep',
            'okt', 'nov', 'dec'
        ]
        for month in months:
            if month in remove_accent(before_hour[-1]):
                return []

    # Fix false time match for input 'jövő hét'
    if remove_accent(hour) == 'het':
        hour_indeces = [
            m.start() for m in re.finditer('het(?!fo)', remove_accent(s))
        ]
        if hour_indeces:
            before_hour = s[:hour_indeces[-1]].split()
            if before_hour:
                if 'jovo' in remove_accent(before_hour[-1]):
                    return []

    res = []
    am = True
    date_parts = []

    if daypart and hour:
        if 'reggel' in daypart or 'delelott' in remove_accent(
                daypart) or 'hajnal' in daypart:
            am = True
        elif 'delutan' in remove_accent(
                daypart) or 'este' in daypart or 'ejjel' in remove_accent(
                    daypart):
            am = False

    if hour:

        # SKIP the whole matching rule when any of these apply
        # TODO: come up with a more elegant solution for this
        # TODO: i.e: by implementing the possibility of one rule exclude another
        # this is made redundant by the change in the patterns
        non_hours = ['ev', 'perc']
        for nh in non_hours:
            if f' {nh}' in remove_accent(hour) or remove_accent(
                    hour).startswith(nh):
                return []

        if 'mulva' in remove_accent(s):
            return []

        hour_num = word_to_num(hour)
        minute_num = word_to_num(minute)

        if not daypart:
            # default to business hour if daypart is not specified
            if hour_num < 8:
                hour_num += 12

        if hour_modifier:
            if 'haromnegyed' in remove_accent(hour_modifier):
                hour_num = hour_num - 1 if hour_num - 1 >= 0 else 23
                minute_num = 45
            elif 'fel' in remove_accent(hour_modifier):
                hour_num = hour_num - 1 if hour_num - 1 >= 0 else 23
                minute_num = 30
            elif 'negyed' in remove_accent(hour_modifier):
                hour_num = hour_num - 1 if hour_num - 1 >= 0 else 23
                minute_num = 15

        if hour_num == NAN:
            return []
        else:
            if hour_num < 12 and not am:
                hour_num += 12

        if minute or hour_modifier:
            # this is made redundant by the change in the patterns
            non_minutes = ['ev', 'ora']
            for nm in non_minutes:
                if f' {nm}' in remove_accent(minute) or remove_accent(
                        minute).startswith(nm):
                    return []

            if 'elott' in remove_accent(minute) and not hour_modifier:
                hour_num -= (minute_num // 60) + 1
                hour_num = hour_num if hour_num >= 0 else 23
                date_parts.extend([
                    Hour(hour_num, 'time_words'),
                    Minute(60 - (minute_num % 60), 'time_words')
                ])
            elif 'elott' in remove_accent(minute) and hour_modifier:
                n_minutes_before = word_to_num(minute)
                if n_minutes_before != NAN:
                    minute_num -= n_minutes_before
                if minute_num < 0:
                    hour_num += (minute_num // 60)
                    hour_num = hour_num if hour_num >= 0 else 23
                    minute_num = minute_num % 60
                date_parts.extend([
                    Hour(hour_num, 'time_words'),
                    Minute(minute_num, 'time_words')
                ])
            elif hour_modifier:
                n_minutes_after = word_to_num(minute)
                if n_minutes_after != NAN:
                    minute_num += n_minutes_after
                if minute_num > 59:
                    hour_num += (minute_num // 60)
                    hour_num = hour_num if hour_num <= 23 else 0
                    minute_num = minute_num % 60
                date_parts.extend([
                    Hour(hour_num, 'time_words'),
                    Minute(minute_num, 'time_words')
                ])
            else:
                date_parts.extend([
                    Hour(hour_num, 'time_words'),
                    Minute(minute_num, 'time_words')
                ])

        else:
            date_parts.append(Hour(hour_num, 'time_words'))

        res.append({'match': group, 'date_parts': date_parts})

    elif daypart:
        if 'hajnal' in daypart:
            res.append({
                'match': group,
                'date_parts': [Daypart(0, 'time_words')]
            })
        elif 'reggel' in daypart:
            res.append({
                'match': group,
                'date_parts': [Daypart(1, 'time_words')]
            })
        elif 'delelott' in remove_accent(daypart):
            res.append({
                'match': group,
                'date_parts': [Daypart(2, 'time_words')]
            })
        elif 'delutan' in remove_accent(daypart):
            res.append({
                'match': group,
                'date_parts': [Daypart(3, 'time_words')]
            })
        elif 'este' in daypart:
            res.append({
                'match': group,
                'date_parts': [Daypart(4, 'time_words')]
            })
        elif 'ejjel' in remove_accent(daypart):
            res.append({
                'match': group,
                'date_parts': [Daypart(5, 'time_words')]
            })

    return res