Esempio n. 1
0
def parse_all(user_input: str, culture: str) -> List[ModelResult]:
    return [
        # Number recognizer - This function will find any number from the input
        # E.g "I have two apples" will return "2".
        Recognizers.recognize_number(user_input, culture),

        # Ordinal number recognizer - This function will find any ordinal number
        # E.g "eleventh" will return "11".
        Recognizers.recognize_ordinal(user_input, culture),

        # Percentage recognizer - This function will find any number presented as percentage
        # E.g "one hundred percents" will return "100%"
        Recognizers.recognize_percentage(user_input, culture),

        # Age recognizer - This function will find any age number presented
        # E.g "After ninety five years of age, perspectives change" will return "95 Year"
        Recognizers.recognize_age(user_input, culture),

        # Currency recognizer - This function will find any currency presented
        # E.g "Interest expense in the 1988 third quarter was $ 75.3 million" will return "75300000 Dollar"
        Recognizers.recognize_currency(user_input, culture),

        # Dimension recognizer - This function will find any dimension presented
        # E.g "The six-mile trip to my airport hotel that had taken 20 minutes earlier in the day took more than three hours." will return "6 Mile"
        Recognizers.recognize_dimension(user_input, culture),

        # Temperature recognizer - This function will find any temperature presented
        # E.g "Set the temperature to 30 degrees celsius" will return "30 C"
        Recognizers.recognize_temperature(user_input, culture),

        # DateTime recognizer - This function will find any Date even if its write in colloquial language -
        # E.g "I'll go back 8pm today" will return "2017-10-04 20:00:00"
        Recognizers.recognize_datetime(user_input, culture)
    ]
    def _parse_all_entities(user_input: str,
                            culture: str) -> List[Dict[Text, Any]]:
        """
        This is the main method that does the entity extraction work.

        For more details: https://github.com/Microsoft/Recognizers-Text/tree/master/Python#api-documentation
        """

        return [
            # Number recognizer - This function will find any number from the input
            # E.g "I have two apples" will return "2".
            Recognizers.recognize_number(user_input, culture),

            # Ordinal number recognizer - This function will find any ordinal number
            # E.g "eleventh" will return "11".
            Recognizers.recognize_ordinal(user_input, culture),

            # Percentage recognizer - This function will find any number presented as percentage
            # E.g "one hundred percents" will return "100%"
            Recognizers.recognize_percentage(user_input, culture),

            # Age recognizer - This function will find any age number presented
            # E.g "After ninety five years of age, perspectives change" will return
            # "95 Year"
            Recognizers.recognize_age(user_input, culture),

            # Currency recognizer - This function will find any currency presented
            # E.g "Interest expense in the 1988 third quarter was $ 75.3 million"
            # will return "75300000 Dollar"
            Recognizers.recognize_currency(user_input, culture),

            # Temperature recognizer - This function will find any temperature presented
            # E.g "Set the temperature to 30 degrees celsius" will return "30 C"
            Recognizers.recognize_temperature(user_input, culture),

            # DateTime recognizer - This function will find any Date even if its write in colloquial language -
            # E.g "I'll go back 8pm today" will return "2017-10-04 20:00:00"
            Recognizers.recognize_datetime(user_input, culture),

            # PhoneNumber recognizer will find any phone number presented
            # E.g "My phone number is ( 19 ) 38294427."
            Recognizers.recognize_phone_number(user_input, culture),

            # Email recognizer will find any phone number presented
            # E.g "Please write to me at [email protected] for more information on task
            # #A1"
            Recognizers.recognize_email(user_input, culture),
        ]
def time_delta(hour_minute, delta_time):
    if not re.match('[0-2][0-9]:[0-5][0-9]', hour_minute):
        result = Recognizers.recognize_datetime(hour_minute, Culture.Chinese)
        if not result or not result[0].resolution['values'] or result[
                0].resolution['values'][0]['type'] != 'time':
            hour_minute = '12:00'
        else:
            hour_minute = result[0].resolution['values'][0]['value'][:5]
    h_str_src, m_str_src = hour_minute.split(':')
    h_int_src, m_int_src = int(h_str_src), int(m_str_src)
    h_int_dst, m_int_dst = h_int_src + delta_time[0], m_int_src + delta_time[1]
    if m_int_dst >= 60:
        m_int_dst -= 60
        h_int_dst += 1
    elif m_int_dst < 0:
        m_int_dst += 60
        h_int_dst -= 1
    if h_int_dst >= 24:
        h_int_dst -= 24
    elif h_int_dst < 0:
        h_int_dst += 24
    return "%02d:%02d" % (h_int_dst, m_int_dst)
def get_real_slot_value(slot_name, slot_value):
    if slot_name == '评分':
        f_rating = f_rating1 = f_rating2 = 4.0
        b_about = False
        digit_items = c2d.takeNumberFromString(slot_value)['digitsStringList']
        if len(digit_items) == 0 or len(digit_items) > 2:
            return '[{:.2f}, ∞)'.format(f_rating)
        elif len(digit_items) == 1:
            f_rating = float(digit_items[0])
        else:
            b_about = True
            f_rating = float(digit_items[0]), float(digit_items[1])
            f_rating1, f_rating2 = min(f_rating[0], f_rating[1]), max(
                f_rating[0], f_rating[1])
        if not b_about:
            f_rating = revise_value(f_rating, slot_value, b_check_half=True)
            if re.search('最少|至少|以上|超过|超出', slot_value):
                return '[{:.2f}, ∞)'.format(f_rating)
            if re.search('最多|最高|以下|不超过|以内|之内', slot_value):
                return '[0.0, {:.2f}]'.format(f_rating)
            if re.search('左右|上下|差不多|大概', slot_value):
                return '[{:.2f}, {:.2f}]'.format(f_rating * 0.95,
                                                 f_rating * 1.05)
            return '{:.2f}'.format(f_rating)
        else:
            obj = re.search('至|到|-|—', slot_value)
            if obj:
                raw_value1, raw_value2 = slot_value[:obj.start(
                )], slot_value[obj.end():]
                f_rating1 = revise_value(f_rating1,
                                         raw_value1,
                                         b_check_half=True)
                f_rating2 = revise_value(f_rating2,
                                         raw_value2,
                                         b_check_half=True)
            if abs(f_rating1 - f_rating2) <= 0.01:
                return '{:.2f}'.format((f_rating1 + f_rating2) / 2)
            else:
                return '[{:.2f}, {:.2f}]'.format(f_rating1, f_rating2)

    if slot_name == '价格':
        f_price1 = f_price2 = 100
        b_about = False
        digit_items = c2d.takeNumberFromString(slot_value)['digitsStringList']
        if len(digit_items) == 0 or len(digit_items) > 2:
            return slot_value
        elif len(digit_items) == 1:
            f_price = abs(float(digit_items[0]))
        else:
            b_about = True
            f_price = abs(float(digit_items[0])), abs(float(digit_items[1]))
            f_price1, f_price2 = min(f_price[0],
                                     f_price[1]), max(f_price[0], f_price[1])
        if not b_about:
            f_price = revise_value(f_price, slot_value)
            if re.search('最少|至少|以上|超过|超出', slot_value):
                return '[{:.2f}, ∞)'.format(f_price)
            if re.search('最多|最高|以下|不超过|以内|之内', slot_value):
                return '[0.0, {:.2f}]'.format(f_price)
            if re.search('左右|上下|差不多|大概', slot_value):
                return '[{:.2f}, {:.2f}]'.format(f_price * 0.8, f_price * 1.2)
            return '{:.2f}'.format(f_price)
        else:
            obj = re.search('至|到|-|—', slot_value)
            if obj:
                raw_value1, raw_value2 = slot_value[:obj.start(
                )], slot_value[obj.end():]
                f_price1 = revise_value(f_price1, raw_value1)
                f_price2 = revise_value(f_price2, raw_value2)
            if abs(f_price1 - f_price2) <= 1.0:
                return '{:.2f}'.format((f_price1 + f_price2) / 2)
            else:
                return '[{:.2f}, {:.2f}]'.format(f_price1, f_price2)

    if slot_name == '时长':
        f_hour1 = f_hour2 = 2
        b_about = False
        digit_items = c2d.takeNumberFromString(slot_value)['digitsStringList']
        if len(digit_items) == 0 or len(digit_items) > 2:
            if '半' in slot_value:
                f_hour = 0.
            else:
                return slot_value
        elif len(digit_items) == 1:
            f_hour = float(digit_items[0])
        elif re.search('[1-9]小时[1-9][0-9]分钟', slot_value):
            f_hour = float(digit_items[0]) + float(digit_items[1]) / 60.0
        else:
            b_about = True
            f_hour = float(digit_items[0]), float(digit_items[1])
            f_hour1, f_hour2 = min(f_hour[0],
                                   f_hour[1]), max(f_hour[0], f_hour[1])
        if not b_about:
            f_hour = revise_value(f_hour,
                                  slot_value,
                                  b_check_half=True,
                                  b_time=True)
            if re.search('最少|至少|以上|超过|超出', slot_value):
                return '[{:.2f}, ∞)'.format(f_hour)
            if re.search('最多|最高|以下|不超过|以内|之内', slot_value):
                return '[0.0, {:.2f}]'.format(f_hour)
            if re.search('左右|差不多|大概', slot_value):
                return '[{:.2f}, {:.2f}]'.format(f_hour * 0.8, f_hour * 1.2)
            return '{:.2f}'.format(f_hour)
        else:
            obj = re.search('至|到|-|—', slot_value)
            if obj:
                raw_value1, raw_value2 = slot_value[:obj.start(
                )], slot_value[obj.end():]
                f_hour1 = revise_value(f_hour1,
                                       raw_value1,
                                       b_check_half=True,
                                       b_time=True)
                f_hour2 = revise_value(f_hour2,
                                       raw_value2,
                                       b_check_half=True,
                                       b_time=True)
            if abs(f_hour1 - f_hour2) <= 0.1:
                return '{:.2f}'.format((f_hour1 + f_hour2) / 2)
            else:
                return '[{:.2f}, {:.2f}]'.format(f_hour1, f_hour2)

    if slot_name == '准点率':
        obj = re.search('至|到|-|—', slot_value)
        if obj and '至少' not in slot_value:
            value1, value2 = slot_value[:obj.start()], slot_value[obj.end():]
            f_punctuality1, f_punctuality2 = retrieve_punctuality(
                value1), retrieve_punctuality(value1)
            if f_punctuality1 < 0.1 or f_punctuality2 < 0.1:
                return slot_value
            if abs(f_punctuality1 - f_punctuality2) <= 0.01:
                return '{:.2f}'.format((f_punctuality1 + f_punctuality2) / 2)
            else:
                return '[{:.2f}, {:.2f}]'.format(f_punctuality1,
                                                 f_punctuality2)
        f_punctuality = retrieve_punctuality(slot_value)
        if f_punctuality < 0.1:
            return slot_value
        if re.search('最低|至少|最少|以上|不低于', slot_value):
            return '[{:.2f}, ∞)'.format(f_punctuality)
        if re.search('大概|左右|上下|差不多', slot_value):
            return '[{:.2f}, {:.2f}]'.format(f_punctuality * 0.8,
                                             f_punctuality * 1.2)
        return slot_value

    if slot_name in ['出发时间', '到达时间']:
        result = Recognizers.recognize_datetime(slot_value, Culture.Chinese)
        if not result or not result[-1].resolution['values']:
            return slot_value
        str_type = result[-1].resolution['values'][0]['type']
        if str_type == 'time':
            time_str = result[-1].resolution['values'][0]['value'][:5]
            if re.search('大概|左右|前后|差不多', slot_value):
                time_left, time_right = time_delta(time_str,
                                                   [0, -10]), time_delta(
                                                       time_str, [0, 10])
                return '[{}, {}]'.format(time_left, time_right)
            if re.search('最早|后|以后|之后', slot_value):
                return '[{}, ∞)'.format(time_str)
            if re.search('最迟|最晚|前|以前|之前', slot_value):
                return '(-∞, {}]'.format(time_str)
            return time_str
        elif str_type == 'timerange':
            time_left = result[-1].resolution['values'][0]['start'][:5]
            time_right = result[-1].resolution['values'][0]['end'][:5]
            return '[{}, {}]'.format(time_left, time_right)
        return slot_value

    if slot_name in ['时间', '开始时间', '结束时间']:
        result = Recognizers.recognize_datetime(slot_value, Culture.Chinese)
        if not result or not result[-1].resolution['values'] or result[
                -1].resolution['values'][0]['type'] != 'time':
            return slot_value
        return result[-1].resolution['values'][0]['value'][:5]

    if slot_name in ['人数', '天数', '数量', '距离', 'choice']:
        if slot_name == 'choice' and '两' in slot_value:
            return '2'
        digit_items = c2d.takeNumberFromString(slot_value)['digitsStringList']
        if len(digit_items) == 0 or len(digit_items) > 2:
            return slot_value
        return str(digit_items[0]).split('.')[0]

    return slot_value