Example #1
0
def switch_to_particular_game_discussion(vars):
    user_uttr = state_utils.get_last_human_utterance(vars)
    user_text = user_uttr.get("text", "").lower()
    prev_bot_uttr = state_utils.get_last_bot_utterance(vars)
    prev_bot_text = prev_bot_uttr.get("text", "")
    found_video_game_in_user_uttr = find_games_in_text(user_text)
    logger.info(
        f"(switch_to_particular_game_discussion)found_video_game_in_user_uttr: {found_video_game_in_user_uttr}"
    )
    found_video_game_in_user_uttr = bool(found_video_game_in_user_uttr)
    found_video_game_in_bot_uttr = find_games_in_text(prev_bot_text)
    logger.info(
        f"(switch_to_particular_game_discussion)found_video_game_in_bot_uttr: {found_video_game_in_bot_uttr}"
    )
    found_video_game_in_bot_uttr = bool(found_video_game_in_bot_uttr)
    choose_particular_game = if_choose_topic(
        user_uttr, prev_bot_uttr) and found_video_game_in_user_uttr
    question_answer_contains_video_game = ("?" not in user_text
                                           and "?" in prev_bot_text
                                           and found_video_game_in_user_uttr)
    bot_asked_about_game_and_user_answered_yes = (found_video_game_in_bot_uttr
                                                  and "?" in prev_bot_text
                                                  and is_yes(user_uttr))
    flag = (lets_talk_about(
        vars, GAMES_WITH_AT_LEAST_1M_COPIES_SOLD_COMPILED_PATTERN)
            or choose_particular_game or question_answer_contains_video_game
            or bot_asked_about_game_and_user_answered_yes)
    logger.info(f"switch_to_particular_game_discussion={flag}")
    return flag
Example #2
0
def lets_chat_about_movies(uttr, prev_uttr=None):
    prev_uttr = {} if prev_uttr is None else prev_uttr
    curr_uttr_is_about_movies = re.search(MOVIE_PATTERN,
                                          uttr.get("text", "").lower())
    lets_talk_about_movies = if_chat_about_particular_topic(
        uttr, prev_uttr, compiled_pattern=MOVIE_PATTERN)
    chosed_topic = if_choose_topic(uttr,
                                   prev_uttr) and curr_uttr_is_about_movies

    if (lets_talk_about_movies or chosed_topic or
        ("?" not in uttr.get("text", "") and "?" in prev_uttr.get("text", "")
         and curr_uttr_is_about_movies)):
        return True
    else:
        return False
Example #3
0
def is_about_movies(uttr, prev_uttr=None):
    prev_uttr = {} if prev_uttr is None else prev_uttr
    curr_uttr_is_about_movies = re.search(MOVIE_PATTERN,
                                          uttr.get("text", "").lower())
    prev_uttr_last_sent = prev_uttr.get("annotations",
                                        {}).get("sentseg",
                                                {}).get("segments",
                                                        [""])[-1].lower()
    prev_uttr_is_about_movies = re.search(MOVIE_PATTERN, prev_uttr_last_sent)
    lets_talk_about_movies = if_chat_about_particular_topic(
        uttr, prev_uttr, compiled_pattern=MOVIE_PATTERN)
    chosed_topic = if_choose_topic(uttr,
                                   prev_uttr) and curr_uttr_is_about_movies

    if (lets_talk_about_movies or chosed_topic or curr_uttr_is_about_movies
            or ("?" in prev_uttr_last_sent and prev_uttr_is_about_movies)):
        return True
    else:
        return False
Example #4
0
def switch_to_general_gaming_discussion(vars):
    user_uttr = state_utils.get_last_human_utterance(vars)
    user_text = user_uttr.get("text", "").lower()
    prev_bot_uttr = state_utils.get_last_bot_utterance(vars)
    prev_bot_text = prev_bot_uttr.get("text", "")
    found_video_game_words_in_user_uttr = bool(
        VIDEO_GAME_WORDS_COMPILED_PATTERN.search(user_text))
    choose_gaming_discussion = if_choose_topic(
        user_uttr, prev_bot_uttr) and found_video_game_words_in_user_uttr
    question_answer_contains_video_game_words = (
        "?" not in user_text and "?" in prev_bot_text
        and found_video_game_words_in_user_uttr)
    bot_asked_about_game_and_user_answered_yes = is_yes(
        user_uttr) and is_question_about_games(prev_bot_text)
    flag = (lets_talk_about(vars, VIDEO_GAME_WORDS_COMPILED_PATTERN)
            or choose_gaming_discussion
            or question_answer_contains_video_game_words
            or bot_asked_about_game_and_user_answered_yes)
    logger.info(f"switch_to_general_gaming_discussion={flag}")
    return flag
Example #5
0
    async def send(self, payload: Dict, callback: Callable):
        st_time = time.time()
        try:
            dialog = payload["payload"]["states_batch"][0]

            skills_for_uttr = []
            user_uttr = dialog["human_utterances"][-1]
            user_uttr_text = user_uttr["text"].lower()
            user_uttr_annotations = user_uttr["annotations"]
            bot_uttr = dialog["bot_utterances"][-1] if len(
                dialog["bot_utterances"]) else {}
            bot_uttr_text_lower = bot_uttr.get("text", "").lower()
            prev_active_skill = bot_uttr.get("active_skill", "")

            intent_catcher_intents = get_intents(user_uttr,
                                                 probs=False,
                                                 which="intent_catcher")
            high_priority_intent_detected = any([
                k for k in intent_catcher_intents
                if k in high_priority_intents["dff_intent_responder_skill"]
            ])
            low_priority_intent_detected = any([
                k for k in intent_catcher_intents if k in low_priority_intents
            ])

            detected_topics = set(get_topics(user_uttr, which="all"))

            is_factoid = get_factoid(user_uttr).get("is_factoid", 0.0) > 0.96
            is_celebrity_mentioned = check_is_celebrity_mentioned(user_uttr)

            if_choose_topic_detected = if_choose_topic(user_uttr, bot_uttr)
            if_lets_chat_about_particular_topic_detected = if_chat_about_particular_topic(
                user_uttr, bot_uttr)

            dialog_len = len(dialog["human_utterances"])

            exit_cond = "exit" in intent_catcher_intents and (
                dialog_len == 1 or
                (dialog_len == 2 and len(user_uttr_text.split()) > 3))
            repeat_cond = ("repeat" in intent_catcher_intents
                           and prev_active_skill in UNPREDICTABLE_SKILLS
                           and re.match(r"^what.?$", user_uttr_text))
            cant_do_cond = ("cant_do" in intent_catcher_intents
                            and "play" in user_uttr_text and any([
                                phrase in bot_uttr_text_lower
                                for phrase in GREETING_QUESTIONS_TEXTS
                            ]))
            for intent_name, condition in zip(
                ["exit", "repeat", "cant_do"],
                [exit_cond, repeat_cond, cant_do_cond]):
                if condition:
                    high_priority_intent_detected = False
                    not_detected = {"detected": 0, "confidence": 0.0}
                    user_uttr["annotations"]["intent_catcher"][
                        intent_name] = not_detected
                    dialog["utterances"][-1]["annotations"]["intent_catcher"][
                        intent_name] = not_detected

            if "/new_persona" in user_uttr_text:
                # process /new_persona command
                skills_for_uttr.append(
                    "personality_catcher"
                )  # TODO: rm crutch of personality_catcher
            elif user_uttr_text == "/get_dialog_id":
                skills_for_uttr.append("dummy_skill")
            elif high_priority_intent_detected:
                # process intent with corresponding IntentResponder
                skills_for_uttr.append("dff_intent_responder_skill")
            elif is_sensitive_topic_and_request(user_uttr):
                # process user utterance with sensitive content, "safe mode"

                # adding open-domain skills without opinion expression
                skills_for_uttr.append("dff_program_y_dangerous_skill")
                skills_for_uttr.append("meta_script_skill")
                skills_for_uttr.append("personal_info_skill")
                skills_for_uttr.append("factoid_qa")
                skills_for_uttr.append("dff_grounding_skill")
                skills_for_uttr.append("dummy_skill")
                skills_for_uttr.append("small_talk_skill")

                if if_lets_chat_about_particular_topic_detected:
                    skills_for_uttr.append("news_api_skill")
                if if_special_weather_turn_on(user_uttr, bot_uttr):
                    skills_for_uttr.append("dff_weather_skill")
                if is_celebrity_mentioned:
                    skills_for_uttr.append("dff_gossip_skill")

                # adding closed-domain skills
                skills_for_uttr += turn_on_skills(
                    detected_topics,
                    intent_catcher_intents,
                    user_uttr_text,
                    bot_uttr.get("text", ""),
                    available_skills=[
                        "news_api_skill",
                        "dff_coronavirus_skill",
                        "dff_funfact_skill",
                        "dff_weather_skill",
                        "dff_short_story_skill",
                    ],
                )
                # adding linked-to skills
                skills_for_uttr.extend(get_linked_to_skills(dialog))
                skills_for_uttr.extend(get_previously_active_skill(dialog))
            else:
                # general case
                if low_priority_intent_detected:
                    skills_for_uttr.append("dff_intent_responder_skill")
                # adding open-domain skills
                skills_for_uttr.append("dff_grounding_skill")
                skills_for_uttr.append("dff_program_y_skill")
                skills_for_uttr.append("personal_info_skill")
                skills_for_uttr.append("meta_script_skill")
                skills_for_uttr.append("dummy_skill")
                skills_for_uttr.append("dialogpt")  # generative skill
                skills_for_uttr.append("small_talk_skill")
                skills_for_uttr.append("knowledge_grounding_skill")
                skills_for_uttr.append("convert_reddit")
                skills_for_uttr.append("comet_dialog_skill")
                skills_for_uttr.append("dff_program_y_wide_skill")

                # adding friendship only in the beginning of the dialog
                if len(dialog["utterances"]) < 20:
                    skills_for_uttr.append("dff_friendship_skill")

                if if_choose_topic_detected or if_lets_chat_about_particular_topic_detected:
                    skills_for_uttr.append("knowledge_grounding_skill")
                    skills_for_uttr.append("news_api_skill")

                switch_wiki_skill, _ = if_switch_wiki_skill(
                    user_uttr, bot_uttr)
                if switch_wiki_skill or switch_wiki_skill_on_news(
                        user_uttr, bot_uttr):
                    skills_for_uttr.append("dff_wiki_skill")
                if if_switch_test_skill(user_uttr, bot_uttr):
                    skills_for_uttr.append("dff_art_skill")

                # adding factoidQA Skill if user utterance is factoid question
                if is_factoid:
                    skills_for_uttr.append("factoid_qa")

                if "dummy_skill" in prev_active_skill and len(
                        dialog["utterances"]) > 4:
                    skills_for_uttr.append("dummy_skill_dialog")

                # if user mentions
                if is_celebrity_mentioned:
                    skills_for_uttr.append("dff_gossip_skill")
                # some special cases
                if if_special_weather_turn_on(user_uttr, bot_uttr):
                    skills_for_uttr.append("dff_weather_skill")
                if if_turn_on_emotion(user_uttr, bot_uttr):
                    skills_for_uttr.append("emotion_skill")
                if get_named_locations(user_uttr):
                    skills_for_uttr.append("dff_travel_skill")
                if extract_movies_names_from_annotations(user_uttr):
                    skills_for_uttr.append("dff_movie_skill")

                # adding closed-domain skills
                skills_for_uttr += turn_on_skills(
                    detected_topics,
                    intent_catcher_intents,
                    user_uttr_text,
                    bot_uttr.get("text", ""),
                    available_skills=[
                        "dff_art_skill",
                        "dff_movie_skill",
                        "dff_book_skill",
                        "news_api_skill",
                        "dff_food_skill",
                        "dff_animals_skill",
                        "dff_sport_skill",
                        "dff_music_skill",
                        "dff_science_skill",
                        "dff_gossip_skill",
                        "game_cooperative_skill",
                        "dff_weather_skill",
                        "dff_funfact_skill",
                        "dff_travel_skill",
                        "dff_coronavirus_skill",
                        "dff_bot_persona_skill",
                        "dff_gaming_skill",
                        "dff_short_story_skill",
                    ],
                )
                # adding linked-to skills
                skills_for_uttr.extend(get_linked_to_skills(dialog))
                skills_for_uttr.extend(get_previously_active_skill(dialog))

            # NOW IT IS NOT ONLY FOR USUAL CONVERSATION BUT ALSO FOR SENSITIVE/HIGH PRIORITY INTENTS/ETC

            if "dff_coronavirus_skill" in skills_for_uttr:
                #  no convert & comet when about coronavirus
                if "convert_reddit" in skills_for_uttr:
                    skills_for_uttr.remove("convert_reddit")
                if "comet_dialog_skill" in skills_for_uttr:
                    skills_for_uttr.remove("comet_dialog_skill")

            if len(dialog["utterances"]) > 1:
                # Use only misheard asr skill if asr is not confident and skip it for greeting
                if user_uttr_annotations.get("asr",
                                             {}).get("asr_confidence",
                                                     "high") == "very_low":
                    skills_for_uttr = ["misheard_asr"]

            if "/alexa_" in user_uttr_text:
                # adding alexa handler for Amazon Alexa specific commands
                skills_for_uttr = ["alexa_handler"]

            logger.info(f"Selected skills: {skills_for_uttr}")

            total_time = time.time() - st_time
            logger.info(f"rule_based_selector exec time = {total_time:.3f}s")
            asyncio.create_task(
                callback(task_id=payload["task_id"],
                         response=list(set(skills_for_uttr))))
        except Exception as e:
            total_time = time.time() - st_time
            logger.info(f"rule_based_selector exec time = {total_time:.3f}s")
            logger.exception(e)
            sentry_sdk.capture_exception(e)
            asyncio.create_task(
                callback(task_id=payload["task_id"],
                         response=["dff_program_y_skill", "dummy_skill"]))
Example #6
0
def tag_based_response_selection(dialog,
                                 candidates,
                                 scores,
                                 confidences,
                                 bot_utterances,
                                 all_prev_active_skills=None):
    all_prev_active_skills = all_prev_active_skills if all_prev_active_skills is not None else []
    all_prev_active_skills = Counter(all_prev_active_skills)
    annotated_uttr = dialog["human_utterances"][-1]
    all_user_intents, all_user_topics, all_user_named_entities, all_user_nounphrases = get_main_info_annotations(
        annotated_uttr)

    _is_switch_topic_request = is_switch_topic(annotated_uttr)
    _is_force_intent = any(
        [_intent in all_user_intents for _intent in FORCE_INTENTS_IC.keys()])
    # if user utterance contains any question (REGEXP & punctuation check!)
    _is_require_action_intent = is_any_question_sentence_in_utterance({
        "text":
        annotated_uttr.get("annotations",
                           {}).get("sentseg", {}).get("punct_sent",
                                                      annotated_uttr["text"])
    })
    # if user utterance contains any question AND requires some intent by socialbot
    _is_require_action_intent = _is_require_action_intent and any([
        _intent in all_user_intents
        for _intent in REQUIRE_ACTION_INTENTS.keys()
    ])
    _force_intents_detected = [
        _intent for _intent in FORCE_INTENTS_IC.keys()
        if _intent in all_user_intents
    ]
    # list of user intents which require some action by socialbot
    _require_action_intents_detected = [
        _intent for _intent in REQUIRE_ACTION_INTENTS.keys()
        if _intent in all_user_intents
    ]
    _force_intents_skills = sum([
        FORCE_INTENTS_IC.get(_intent, [])
        for _intent in _force_intents_detected
    ], [])
    # list of intents required by the socialbot
    _required_actions = sum([
        REQUIRE_ACTION_INTENTS.get(_intent, [])
        for _intent in _require_action_intents_detected
    ], [])
    _contains_entities = len(
        get_entities(annotated_uttr, only_named=False, with_labels=False)) > 0
    _is_active_skill_can_not_continue = False

    _prev_bot_uttr = dialog["bot_utterances"][-1] if len(
        dialog["bot_utterances"]) > 0 else {}
    _prev_active_skill = dialog["bot_utterances"][-1]["active_skill"] if len(
        dialog["bot_utterances"]) > 0 else ""
    _prev_prev_active_skill = dialog["bot_utterances"][-2][
        "active_skill"] if len(dialog["bot_utterances"]) > 1 else ""
    _no_script_two_times_in_a_row = False
    if _prev_active_skill and _prev_prev_active_skill:
        if all([
                skill not in ACTIVE_SKILLS + ALMOST_ACTIVE_SKILLS
                for skill in [_prev_active_skill, _prev_prev_active_skill]
        ]):
            _no_script_two_times_in_a_row = True
    disliked_skills = get_updated_disliked_skills(
        dialog, can_not_be_disliked_skills=CAN_NOT_BE_DISLIKED_SKILLS)

    _is_dummy_linkto_available = any([
        cand_uttr["skill_name"] == "dummy_skill"
        and cand_uttr.get("type", "") == "link_to_for_response_selector"
        for cand_uttr in candidates
    ])

    categorized_hyps = {}
    categorized_prompts = {}
    for dasuffix in ["reqda", ""]:
        for actsuffix in ["active", "continued", "finished"]:
            for suffix in [
                    "same_topic_entity_no_db",
                    "same_topic_entity_db",
                    "othr_topic_entity_no_db",
                    "othr_topic_entity_db",
            ]:
                categorized_hyps[f"{actsuffix}_{suffix}_{dasuffix}"] = []
                categorized_prompts[f"{actsuffix}_{suffix}_{dasuffix}"] = []

    CASE = ""
    acknowledgement_hypothesis = {}

    for cand_id, cand_uttr in enumerate(candidates):
        if confidences[cand_id] == 0.0 and cand_uttr[
                "skill_name"] not in ACTIVE_SKILLS:
            logger.info(
                f"Dropping cand_id: {cand_id} due to toxicity/badlists")
            continue

        all_cand_intents, all_cand_topics, all_cand_named_entities, all_cand_nounphrases = get_main_info_annotations(
            cand_uttr)
        skill_name = cand_uttr["skill_name"]
        _is_dialog_abandon = get_dialog_breakdown_annotations(
            cand_uttr) and PRIORITIZE_NO_DIALOG_BREAKDOWN
        _is_just_prompt = (cand_uttr["skill_name"] == "dummy_skill" and any([
            question_type in cand_uttr.get("type", "") for question_type in
            ["normal_question", "link_to_for_response_selector"]
        ])) or cand_uttr.get("response_parts", []) == ["prompt"]
        if cand_uttr["confidence"] == 1.0:
            # for those hypotheses where developer forgot to set tag to MUST_CONTINUE
            cand_uttr["can_continue"] = MUST_CONTINUE
        _can_continue = cand_uttr.get("can_continue", CAN_NOT_CONTINUE)

        _user_wants_to_chat_about_topic = (
            if_chat_about_particular_topic(annotated_uttr)
            and "about it" not in annotated_uttr["text"].lower())
        _user_does_not_want_to_chat_about_topic = if_not_want_to_chat_about_particular_topic(
            annotated_uttr)
        _user_wants_bot_to_choose_topic = if_choose_topic(
            annotated_uttr, _prev_bot_uttr)

        if any([
                phrase.lower() in cand_uttr["text"].lower()
                for phrase in LINK_TO_PHRASES
        ]):
            # add `prompt` to response_parts if any linkto phrase in hypothesis
            cand_uttr["response_parts"] = cand_uttr.get("response_parts",
                                                        []) + ["prompt"]

        # identifies if candidate contains named entities from last human utterance
        _same_named_entities = (len(
            get_common_tokens_in_lists_of_strings(
                all_cand_named_entities, all_user_named_entities)) > 0)
        # identifies if candidate contains all (not only named) entities from last human utterance
        _same_nounphrases = len(
            get_common_tokens_in_lists_of_strings(all_cand_nounphrases,
                                                  all_user_nounphrases)) > 0
        _same_topic_entity = (_same_named_entities or _same_nounphrases
                              ) and PRIORITIZE_WITH_SAME_TOPIC_ENTITY

        _is_active_skill = (_prev_active_skill == cand_uttr["skill_name"] or
                            cand_uttr.get("can_continue", "") == MUST_CONTINUE)
        _is_active_skill = _is_active_skill and skill_name in ACTIVE_SKILLS
        _is_active_skill = _is_active_skill and (_can_continue in [
            MUST_CONTINUE, CAN_CONTINUE_SCENARIO, CAN_NOT_CONTINUE
        ] or (_can_continue == CAN_CONTINUE_PROMPT
              and all_prev_active_skills.get(skill_name, []) < 10))
        _is_active_skill = _is_active_skill and PRIORITIZE_SCRIPTED_SKILLS
        if _is_active_skill:
            # we will forcibly add prompt if current scripted skill finishes scenario,
            # and has no opportunity to continue at all.
            _is_active_skill_can_not_continue = _is_active_skill and _can_continue in [
                CAN_NOT_CONTINUE
            ]

        if _is_force_intent:
            # =====force intents, choose as best_on_topic hypotheses from skills responding this request=====

            CASE = "Force intent."
            if cand_uttr["skill_name"] in _force_intents_skills:
                categorized_hyps, categorized_prompts = categorize_candidate(
                    cand_id,
                    skill_name,
                    categorized_hyps,
                    categorized_prompts,
                    _is_just_prompt,
                    _is_active_skill,
                    _can_continue,
                    _same_topic_entity,
                    _is_dialog_abandon,
                    _is_required_da=False,
                )

        elif _is_switch_topic_request or _user_does_not_want_to_chat_about_topic or _user_wants_bot_to_choose_topic:
            # =====direct request by user to switch the topic of current conversation=====
            # give priority to dummy linkto hypothesis if available, else other prompts if available.
            _is_active_skill = (
                cand_uttr.get("type", "") == "link_to_for_response_selector"
                if _is_dummy_linkto_available else _is_just_prompt)
            # no priority to must_continue to skip incorrect continuation of script
            _can_continue = CAN_CONTINUE_SCENARIO if _can_continue == MUST_CONTINUE else _can_continue

            CASE = "Switch topic intent."
            if len(all_user_named_entities) > 0 or len(
                    all_user_nounphrases) > 0:
                # -----user defines new topic/entity-----
                # _same_topic_entity does not depend on hyperparameter in these case
                _same_topic_entity = _same_named_entities or _same_nounphrases

                categorized_hyps, categorized_prompts = categorize_candidate(
                    cand_id,
                    skill_name,
                    categorized_hyps,
                    categorized_prompts,
                    _is_just_prompt,
                    _is_active_skill,
                    _can_continue,
                    _same_topic_entity,
                    _is_dialog_abandon,
                    _is_required_da=False,
                )
            else:
                # -----user want socialbot to define new topic/entity-----
                categorized_hyps, categorized_prompts = categorize_candidate(
                    cand_id,
                    skill_name,
                    categorized_hyps,
                    categorized_prompts,
                    _is_just_prompt,
                    _is_active_skill,
                    _can_continue,
                    _same_topic_entity,
                    _is_dialog_abandon,
                    _is_required_da=False,
                )

        elif _user_wants_to_chat_about_topic:
            # user wants to chat about particular topic

            CASE = "User wants to talk about topic."
            # in this case we do not give priority to previously active skill (but give to must continue skill!)
            # because now user wants to talk about something particular
            _is_active_skill = cand_uttr.get("can_continue",
                                             "") == MUST_CONTINUE
            # _same_topic_entity does not depend on hyperparameter in these case
            _same_topic_entity = _same_named_entities or _same_nounphrases

            categorized_hyps, categorized_prompts = categorize_candidate(
                cand_id,
                skill_name,
                categorized_hyps,
                categorized_prompts,
                _is_just_prompt,
                _is_active_skill,
                _can_continue,
                _same_topic_entity,
                _is_dialog_abandon,
                _is_required_da=False,
            )

        elif _is_require_action_intent and PRIORITIZE_WITH_REQUIRED_ACT:
            # =====user intent requires particular action=====

            CASE = "User intent requires action. USER UTTERANCE CONTAINS QUESTION."
            _is_grounding_reqda = (skill_name == "dff_grounding_skill"
                                   and cand_uttr.get(
                                       "type", "") == "universal_response")
            _is_active_skill = cand_uttr.get(
                "can_continue",
                "") == MUST_CONTINUE  # no priority to prev active skill
            _can_continue = CAN_NOT_CONTINUE  # no priority to scripted skills

            if set(all_cand_intents).intersection(
                    set(_required_actions
                        )) or _is_grounding_reqda or _is_active_skill:
                # -----one of the can intent is in intents required by user-----
                categorized_hyps, categorized_prompts = categorize_candidate(
                    cand_id,
                    skill_name,
                    categorized_hyps,
                    categorized_prompts,
                    _is_just_prompt,
                    _is_active_skill,
                    _can_continue,
                    _same_topic_entity,
                    _is_dialog_abandon,
                    _is_required_da=True,
                )
            else:
                # -----NO required dialog acts-----
                categorized_hyps, categorized_prompts = categorize_candidate(
                    cand_id,
                    skill_name,
                    categorized_hyps,
                    categorized_prompts,
                    _is_just_prompt,
                    _is_active_skill,
                    _can_continue,
                    _same_topic_entity,
                    _is_dialog_abandon,
                    _is_required_da=False,
                )

        else:
            # =====user intent does NOT require particular action=====

            CASE = "General case."
            categorized_hyps, categorized_prompts = categorize_candidate(
                cand_id,
                skill_name,
                categorized_hyps,
                categorized_prompts,
                _is_just_prompt,
                _is_active_skill,
                _can_continue,
                _same_topic_entity,
                _is_dialog_abandon,
                _is_required_da=False,
            )

        # a bit of rule based help

        if (len(dialog["human_utterances"]) == 1
                and cand_uttr["skill_name"] == "dff_friendship_skill"
                and greeting_spec in cand_uttr["text"]):
            categorized_hyps = add_to_top1_category(cand_id, categorized_hyps,
                                                    _is_require_action_intent)
        elif (cand_uttr["skill_name"] == "dff_friendship_skill"
              and (how_are_you_spec in cand_uttr["text"]
                   or what_i_can_do_spec in cand_uttr["text"])
              and len(dialog["utterances"]) < 16):
            categorized_hyps = add_to_top1_category(cand_id, categorized_hyps,
                                                    _is_require_action_intent)
        # elif cand_uttr["skill_name"] == 'program_y_dangerous' and cand_uttr['confidence'] == 0.98:
        #     categorized_hyps = add_to_top1_category(cand_id, categorized_hyps, _is_require_action_intent)
        elif cand_uttr[
                "skill_name"] == "small_talk_skill" and is_sensitive_situation(
                    dialog["human_utterances"][-1]):
            # let small talk to talk about sex ^_^
            categorized_hyps = add_to_top1_category(cand_id, categorized_hyps,
                                                    _is_require_action_intent)
        elif cand_uttr["confidence"] >= 1.0:
            # -------------------- SUPER CONFIDENCE CASE HERE! --------------------
            categorized_hyps = add_to_top1_category(cand_id, categorized_hyps,
                                                    _is_require_action_intent)

        if cand_uttr["skill_name"] == "dff_grounding_skill" and [
                "acknowledgement"
        ] == cand_uttr.get("response_parts", []):
            acknowledgement_hypothesis = deepcopy(cand_uttr)

    logger.info(f"Current CASE: {CASE}")
    # now compute current scores as one float value
    curr_single_scores = compute_curr_single_scores(candidates, scores,
                                                    confidences)

    # remove disliked skills from hypotheses
    if IGNORE_DISLIKED_SKILLS:
        for category in categorized_hyps:
            new_ids = []
            for cand_id in categorized_hyps[category]:
                if (candidates[cand_id]["skill_name"] in disliked_skills
                        and candidates[cand_id].get("can_continue",
                                                    CAN_NOT_CONTINUE)
                        == MUST_CONTINUE):
                    disliked_skills.remove(candidates[cand_id]["skill_name"])
                if candidates[cand_id]["skill_name"] not in disliked_skills:
                    new_ids.append(cand_id)
            categorized_hyps[category] = deepcopy(new_ids)
        for category in categorized_prompts:
            new_ids = []
            for cand_id in categorized_prompts[category]:
                if (candidates[cand_id]["skill_name"] in disliked_skills
                        and candidates[cand_id].get("can_continue",
                                                    CAN_NOT_CONTINUE)
                        == MUST_CONTINUE):
                    disliked_skills.remove(candidates[cand_id]["skill_name"])
                if candidates[cand_id]["skill_name"] not in disliked_skills:
                    new_ids.append(cand_id)
            categorized_prompts[category] = deepcopy(new_ids)

    best_cand_id = pickup_best_id(categorized_hyps, candidates,
                                  curr_single_scores, bot_utterances)
    best_candidate = candidates[best_cand_id]
    best_candidate["human_attributes"] = best_candidate.get(
        "human_attributes", {})
    # save updated disliked skills to human attributes of the best candidate
    best_candidate["human_attributes"]["disliked_skills"] = disliked_skills
    logger.info(f"Best candidate: {best_candidate}")
    n_sents_without_prompt = len(sent_tokenize(best_candidate["text"]))
    _is_best_not_script = best_candidate[
        "skill_name"] not in ACTIVE_SKILLS + ALMOST_ACTIVE_SKILLS
    no_question_by_user = "******" not in dialog["human_utterances"][-1][
        "annotations"].get("sentseg",
                           {}).get("punct_sent",
                                   dialog["human_utterances"][-1]["text"])

    # if `no` to 1st in a row linkto question, and chosen response is not from scripted skill
    _no_to_first_linkto = is_no(dialog["human_utterances"][-1]) and any([
        phrase.lower() in _prev_bot_uttr.get("text", "").lower()
        for phrase in LINK_TO_PHRASES
    ])
    # if chosen short response or question by not-scripted skill
    _is_short_or_question_by_not_script = _is_best_not_script and (
        "?" in best_candidate["text"]
        or len(best_candidate["text"].split()) < 4)
    _no_questions_for_3_steps = not any([
        is_any_question_sentence_in_utterance(uttr)
        for uttr in dialog["bot_utterances"][-3:]
    ])

    if PRIORITIZE_PROMTS_WHEN_NO_SCRIPTS:
        if (_no_script_two_times_in_a_row
                and _is_short_or_question_by_not_script
                and no_question_by_user) or (_no_to_first_linkto
                                             and _is_best_not_script):
            # if no scripted skills 2 time sin a row before, current chosen best cand is not scripted, contains `?`,
            # and user utterance does not contain "?", replace utterance with dummy!
            best_prompt_id = pickup_best_id(categorized_prompts, candidates,
                                            curr_single_scores, bot_utterances)
            best_candidate = deepcopy(candidates[best_prompt_id])
            best_cand_id = best_prompt_id

    if does_not_require_prompt(candidates, best_cand_id):
        # the candidate already contains a prompt or a question or of a length more than 200 symbols
        logger.info(
            "Best candidate contains prompt, question, request or length of > 200 symbols. Do NOT add prompt."
        )
        pass
    elif sum(categorized_prompts.values(), []):
        # best cand is 3d times in a row not scripted skill, let's append linkto

        # need to add some prompt, and have a prompt
        _add_prompt_forcibly = best_candidate[
            "skill_name"] == _prev_active_skill and _is_active_skill_can_not_continue
        _add_prompt_forcibly = _add_prompt_forcibly and not _contains_entities

        # prompts are added:
        # - in 1 out of 10 cases, if current human utterance does not contain entities,
        # and no prompt for several last bot utterances
        # - if PRIORITIZE_PROMTS_WHEN_NO_SCRIPTS and current utterance is from active on prev step scripted skill and
        # it has a status can-not-continue
        # - if PRIORITIZE_PROMTS_WHEN_NO_SCRIPTS and last 2 bot uttr are not from scripted skill,
        # and current best uttr is also from not-scripted skill
        if ((prompt_decision() and not _contains_entities
             and _no_questions_for_3_steps)
                or (_add_prompt_forcibly and PRIORITIZE_PROMTS_WHEN_NO_SCRIPTS)
                or
            (PRIORITIZE_PROMTS_WHEN_NO_SCRIPTS
             and _no_script_two_times_in_a_row and _is_best_not_script)):
            logger.info("Decided to add a prompt to the best candidate.")
            best_prompt_id = pickup_best_id(categorized_prompts, candidates,
                                            curr_single_scores, bot_utterances)
            # as we have only one active skill, let's consider active skill as that one providing prompt
            # but we also need to reassign all the attributes
            best_prompt = candidates[best_prompt_id]
            best_candidate[
                "text"] = f'{best_candidate["text"]} {best_prompt["text"]}'
            best_candidate["attributes"] = best_candidate.get("attributes", {})
            best_candidate["attributes"]["prompt_skill"] = best_prompt

            # anyway we must combine used links
            best_candidate["human_attributes"] = best_candidate.get(
                "human_attributes", {})
            best_candidate["human_attributes"] = join_used_links_in_attributes(
                best_candidate["human_attributes"],
                best_prompt.get("human_attributes", {}))
            if len(best_candidate["human_attributes"]["used_links"]) == 0:
                best_candidate["human_attributes"].pop("used_links")

    was_ackn = if_acknowledgement_in_previous_bot_utterance(dialog)
    best_resp_cont_ackn = "acknowledgement" in best_candidate.get(
        "response_parts", [])

    if (ADD_ACKNOWLEDGMENTS_IF_POSSIBLE and acknowledgement_hypothesis
            and acknowledgement_decision(all_user_intents)
            and n_sents_without_prompt == 1 and not was_ackn
            and not best_resp_cont_ackn):
        logger.info(
            "Acknowledgement is given, Final hypothesis contains only 1 sentence, no ackn in prev bot uttr,"
            "and we decided to add an acknowledgement to the best candidate.")
        best_candidate[
            "text"] = f'{acknowledgement_hypothesis["text"]} {best_candidate["text"]}'
        best_candidate["response_parts"] = ["acknowledgement"
                                            ] + best_candidate.get(
                                                "response_parts", [])

    return best_candidate, best_cand_id, curr_single_scores
Example #7
0
def get_response_for_particular_topic_and_status(topic,
                                                 curr_meta_script_status,
                                                 dialog, source_topic):
    attr = {
        "meta_script_topic": topic,
        "meta_script_status": curr_meta_script_status
    }

    if len(dialog["human_utterances"]) > 0:
        user_uttr = dialog["human_utterances"][-1]
        text_user_uttr = dialog["human_utterances"][-1]["text"].lower()
        last_user_sent_text = (dialog["human_utterances"][-1].get(
            "annotations", {}).get("sentseg", {}).get("segments",
                                                      [""])[-1].lower())
    else:
        user_uttr = {"text": ""}
        text_user_uttr = ""
        last_user_sent_text = ""
    if len(dialog["bot_utterances"]) > 0:
        bot_uttr = dialog["bot_utterances"][-1]
    else:
        bot_uttr = {}
    if curr_meta_script_status == "starting":
        response, confidence, attr = get_starting_phrase(dialog, topic, attr)
        attr["response_parts"] = ["prompt"]
        can_offer_topic = if_choose_topic(dialog["human_utterances"][-1],
                                          bot_uttr)
        talk_about_user_topic = is_custom_topic(
            topic) and if_chat_about_particular_topic(user_uttr, bot_uttr)

        prev_what_to_talk_about_outputs = [
            get_outputs_with_response_from_dialog(dialog["utterances"][-3:],
                                                  response=response,
                                                  activated=True) for response
            in GREETING_QUESTIONS[list(GREETING_QUESTIONS.keys())[0]]
        ]
        prev_what_to_talk_about_outputs = sum([
            list_of_outputs
            for list_of_outputs in prev_what_to_talk_about_outputs
            if len(list_of_outputs) > 0
        ], [])
        prev_what_to_talk_about_greeting = len(
            prev_what_to_talk_about_outputs) > 0 and bot_uttr.get(
                "active_skill", "") in ["dff_friendship_skill", "program_y"]

        if (not prev_what_to_talk_about_greeting
                and can_offer_topic) or talk_about_user_topic:
            # if person wants to talk about something particular and we have extracted some topic - do that!
            confidence = MATCHED_DIALOG_BEGIN_CONFIDENCE
        elif "?" in last_user_sent_text or prev_what_to_talk_about_greeting:
            # if some question was asked by user, do not start script at all!
            response, confidence = "", 0.0
        elif len(dialog["utterances"]) <= 20:
            confidence = DEFAULT_DIALOG_BEGIN_CONFIDENCE
        elif source_topic == NP_SOURCE:
            confidence = NOUN_TOPIC_STARTING_CONFIDENCE
        else:
            confidence = DEFAULT_STARTING_CONFIDENCE
    else:
        if curr_meta_script_status == "deeper1" and "?" in last_user_sent_text and "what" not in text_user_uttr:
            response, confidence, attr = "", 0.0, {}
        elif "?" in last_user_sent_text and not check_topic_lemmas_in_sentence(
                text_user_uttr, topic):
            logger.info(
                "Question by user was detected. Without any word from topic in it. "
                "Don't continue the script on this turn.")
            response, confidence, attr = "", 0.0, {}
        elif is_switch_topic(user_uttr) or if_chat_about_particular_topic(
                user_uttr):
            logger.info("Topic switching was detected. Finish script.")
            response, confidence = FINISHED_SCRIPT_RESPONSE, 0.5
            attr["meta_script_status"] = FINISHED_SCRIPT
            attr["can_continue"] = CAN_NOT_CONTINUE
        elif get_user_replies_to_particular_skill(
                dialog["utterances"],
                "meta_script_skill")[-2:] == ["no.", "no."]:
            logger.info(
                "Two consequent `no` answers were detected. Finish script.")
            response, confidence = FINISHED_SCRIPT_RESPONSE, 0.5
            attr["meta_script_status"] = FINISHED_SCRIPT
            attr["can_continue"] = CAN_NOT_CONTINUE
        elif curr_meta_script_status == "comment":
            response, confidence, attr = get_comment_phrase(dialog, attr)
            attr["can_continue"] = CAN_NOT_CONTINUE
        elif curr_meta_script_status == "opinion":
            response, confidence, attr = get_opinion_phrase(
                dialog, topic, attr)
        elif curr_meta_script_status == "deeper1" and (
                is_no(user_uttr) or "never" in text_user_uttr):
            response, confidence = FINISHED_SCRIPT_RESPONSE, 0.5
            attr["meta_script_status"] = FINISHED_SCRIPT
            attr["can_continue"] = CAN_NOT_CONTINUE
        else:
            response, confidence, attr = get_statement_phrase(
                dialog, topic, attr, TOPICS)
            attr["can_continue"] = CAN_CONTINUE_SCENARIO

        if confidence > 0.7 and (is_yes(user_uttr)
                                 or len(text_user_uttr.split()) > 7):
            # if yes detected, confidence 1.0 - we like agreements!
            confidence = 1.0
        if confidence > 0.7 and bot_uttr.get("active_skill",
                                             "") != "meta_script_skill":
            confidence = BROKEN_DIALOG_CONTINUE_CONFIDENCE

    logger.info(
        f"User sent: `{text_user_uttr}`. Response: `{response}`. Attr: `{attr}.`"
    )
    return response, confidence, attr
Example #8
0
def get_statuses_and_topics(dialog):
    """
    Find prevously discussed meta-script topics, the last met-script status,
    determine current step meta-script status and topic.

    Args:
        dialog: dialog itself

    Returns:
        tuple of current status and topic
    """
    # deeper2 and opinion could be randomly skipped in dialog flow
    dialog_flow = ["starting", "deeper1", "deeper2", "opinion", "comment"]
    dialog_flow_user_topic = ["starting", "deeper1", "comment"]
    curr_meta_script_statuses = []
    curr_meta_script_topics = []
    source_topics = []

    if len(dialog["utterances"]) >= 3:
        # if dialog is not empty

        used_topics = get_used_attributes_by_name(
            dialog["utterances"],
            attribute_name="meta_script_topic",
            value_by_default="",
            activated=True)

        # this determines how many replies back we assume active meta script skill to continue dialog.
        # let's assume we can continue if meta_scrip skill was active on up to 2 steps back
        prev_reply_output = get_skill_outputs_from_dialog(
            dialog["utterances"][-5:],
            skill_name="meta_script_skill",
            activated=True)
        # get last meta script output even if it was not activated but right after it was active
        last_all_meta_script_outputs = get_skill_outputs_from_dialog(
            dialog["utterances"][-5:],
            skill_name="meta_script_skill",
            activated=False)
        prev_topic_finished = False
        for out in last_all_meta_script_outputs:
            if out.get("meta_script_status", "") == "finished":
                logger.info(
                    f"Found finished dialog on meta_script_topic: `{out.get('meta_script_status', '')}`"
                )
                prev_topic_finished = True

        if len(prev_reply_output) > 0:
            # previously active skill was `meta_script_skill`
            curr_meta_script_status = prev_reply_output[-1].get(
                "meta_script_status", "")
        else:
            # previous active skill was not `meta_script_skill`
            curr_meta_script_status = ""
        logger.info(f"Found meta_script_status: `{curr_meta_script_status}`")

        if curr_meta_script_status in ["comment", "", FINISHED_SCRIPT
                                       ] or prev_topic_finished:
            # if previous meta script is finished (comment given) in previous bot reply
            # or if no meta script in previous reply or script was forcibly
            topics, curr_source_topics = get_not_used_topics(
                used_topics, dialog)
            if curr_source_topics != [PREDEFINED_SOURCE]:
                # if topic is extracted from utterances
                pass
            elif if_choose_topic(dialog["human_utterances"][-1],
                                 dialog["bot_utterances"][-1]):
                # len(utterances) >3 so at least 1 bot utterance exists
                # one of the predefined topics (wiki or hand-written)
                curr_meta_script_statuses += [dialog_flow[0]] * len(topics)
                curr_meta_script_topics += topics
                source_topics += curr_source_topics
            else:
                pass
        else:
            # some meta script is already in progress
            # we define it here as predefined because we do not care about this variable if it's not script starting
            source_topic = PREDEFINED_SOURCE
            curr_meta_script_topic = used_topics[-1]
            logger.info(
                f"Found meta_script_status: `{curr_meta_script_status}` "
                f"on previous meta_script_topic: `{curr_meta_script_topic}`")
            # getting the next dialog flow status
            if is_custom_topic(curr_meta_script_topic):
                curr_meta_script_status = dialog_flow_user_topic[
                    dialog_flow_user_topic.index(curr_meta_script_status) + 1]
            else:
                curr_meta_script_status = dialog_flow[
                    dialog_flow.index(curr_meta_script_status) + 1]

            if curr_meta_script_status == "opinion":
                # randomly skip third deeper question
                if uniform(0, 1) <= 0.5:
                    curr_meta_script_status = "comment"
            if curr_meta_script_status == "deeper2":
                # randomly skip third deeper question
                if uniform(0, 1) <= 0.5:
                    curr_meta_script_status = "opinion"
            logger.info(f"New meta_script_status: `{curr_meta_script_status}` "
                        f"on meta_script_topic: `{curr_meta_script_topic}`")
            curr_meta_script_statuses += [curr_meta_script_status]
            curr_meta_script_topics += [curr_meta_script_topic]
            source_topics += [source_topic]
    else:
        # start of the dialog, pick up a topic of meta script
        curr_meta_script_topics, source_topics = get_not_used_topics([],
                                                                     dialog)
        if source_topics != [PREDEFINED_SOURCE]:
            curr_meta_script_statuses = [dialog_flow_user_topic[0]
                                         ] * len(curr_meta_script_topics)
        else:
            curr_meta_script_statuses = [dialog_flow[0]
                                         ] * len(curr_meta_script_topics)

    logger.info(f"Final new meta_script_status: `{curr_meta_script_statuses}` "
                f"on meta_script_topic: `{curr_meta_script_topics}`")
    return curr_meta_script_statuses, curr_meta_script_topics, source_topics
Example #9
0
def rule_score_based_selection(dialog, candidates, scores, confidences, is_toxics, bot_utterances):
    curr_single_scores = []

    bot_utt_counter = Counter(bot_utterances)
    lower_duplicates_score(candidates, bot_utt_counter, scores, confidences)
    lower_retrieve_skills_confidence_if_scenario_exist(candidates, scores, confidences)

    # prev_active_skill = dialog["bot_utterances"][-1]['active_skill'] if len(dialog["bot_utterances"]) > 0 else ''
    skill_names = [c["skill_name"] for c in candidates]

    very_big_score = 100
    very_low_score = -100
    dummy_question = ""
    dummy_question_human_attr = {}
    link_to_question = ""
    link_to_human_attrs = {}
    not_sure_factoid = False
    if "factoid_qa" in skill_names:
        factoid_index = skill_names.index("factoid_qa")
        logging.debug("factoid")
        logging.debug(str(candidates[factoid_index]))
        if "not sure" in candidates[factoid_index] and candidates[factoid_index]["not sure"]:
            not_sure_factoid = True
    for i in range(len(scores)):
        curr_score = None
        is_misheard = misheard_with_spec1 in candidates[i]["text"] or misheard_with_spec2 in candidates[i]["text"]
        intent_name = get_intent_name(candidates[i]["text"])
        is_intent_candidate = (skill_names[i] in ["dff_intent_responder_skill", "dff_program_y_skill"]) and intent_name
        is_intent_candidate = is_intent_candidate and intent_name not in low_priority_intents
        # print("is intent candidate? " + str(is_intent_candidate), flush=True)

        if len(dialog["human_utterances"]) == 1 and greeting_spec not in candidates[i]["text"]:
            logger.info("Dialog Beginning detected.")
            if (
                if_chat_about_particular_topic(dialog["utterances"][0])
                and "about it" not in dialog["utterances"][0]["text"].lower()
            ):
                logger.info("User wants to talk about particular topic")
                # if user says `let's chat about blablabla`
                if skill_names[i] == "factoid_qa":
                    logger.info("Particular topic. Facts + Greeting to very big score.")
                    # I don't have an opinion on that but I know some facts.
                    resp = candidates[i]["text"].replace("I don't have an opinion on that but I know some facts.", "")
                    candidates[i]["text"] = "Hi, " + greeting_spec + "! " + resp
                    curr_score = very_big_score
                elif skill_names[i] == "meta_script_skill" and len(candidates[i]["text"]) > 0 and confidences[i] > 0.98:
                    logger.info("Particular topic. meta_script_skill + Greeting to very big score.")
                    # I don't have an opinion on that but I know some facts.
                    resp = candidates[i]["text"]
                    candidates[i]["text"] = "Hi, " + greeting_spec + "! " + resp
                    curr_score = very_big_score
                elif skill_names[i] == "small_talk_skill":
                    logger.info("Particular topic. Small-talk + Greeting NOT to very big score.")
                    # for now do not give small talk a very big score here
                    candidates[i]["text"] = "Hi, " + greeting_spec + "! " + candidates[i]["text"]
                    # curr_score = very_big_score
            elif if_choose_topic(dialog["utterances"][0]) and "about it" not in dialog["utterances"][0]["text"].lower():
                logger.info("User wants bot to choose the topic")
                # if user says `let's chat about something`
                if skill_names[i] == "small_talk_skill":
                    logger.info("No topic. Small-talk + Greeting to very big score.")
                    candidates[i]["text"] = "Hi, " + greeting_spec + "! " + candidates[i]["text"]
                    curr_score = very_big_score
                elif skill_names[i] == "meta_script_skill" and len(candidates[i]["text"]) > 0:
                    logger.info("No topic. Meta-script + Greeting to very big score.")
                    candidates[i]["text"] = "Hi, " + greeting_spec + "! " + candidates[i]["text"]
                    curr_score = very_big_score
            else:
                logger.info("User just wants to talk.")
                # if user says something else
                if skill_names[i] == "program_y" and greeting_spec in candidates[i]["text"]:
                    logger.info("Just chat. Program-y to very big score.")
                    curr_score = very_big_score
        elif (
            skill_names[i] == "dff_friendship_skill"
            and (how_are_you_spec in candidates[i]["text"] or what_i_can_do_spec in candidates[i]["text"])
            and len(dialog["utterances"]) < 16
        ):
            curr_score = very_big_score
        elif skill_names[i] == "dff_friendship_skill" and greeting_spec in candidates[i]["text"]:
            if len(dialog["utterances"]) < 2:
                curr_score = very_big_score
            else:
                confidences[i] = 0.2  # Low confidence for greeting in the middle of dialogue
        # we don't have 'cobotqa' anymore; instead we have factoid_qa
        elif skill_names[i] in ["factoid_qa"] and "Here's something I found on the web." in candidates[i]["text"]:
            confidences[i] = 0.6
        elif (
            skill_names[i] == "factoid_qa"
            and dialog["human_utterances"][-1]["annotations"]
            .get("intent_catcher", {})
            .get("weather_forecast_intent", {})
            .get("detected", 0)
            == 1
        ):
            confidences[i] = 0.8
        elif skill_names[i] == "misheard_asr" and is_misheard:
            curr_score = very_big_score
        elif is_intent_candidate:
            curr_score = very_big_score
        elif skill_names[i] in ["dummy_skill", "convert_reddit", "alice", "eliza", "tdidf_retrieval", "program_y"]:
            if "question" in candidates[i].get("type", "") or "?" in candidates[i]["text"]:
                penalty_start_utt = 1
                if skill_names[i] == "program_y":
                    penalty_start_utt = 4

                n_questions = 0
                if len(bot_utterances) >= penalty_start_utt and "?" in bot_utterances[-1]:
                    confidences[i] /= 1.5
                    n_questions += 1
                if len(bot_utterances) >= penalty_start_utt + 1 and "?" in bot_utterances[-2]:
                    confidences[i] /= 1.1
                    n_questions += 1
                if n_questions == 2:
                    # two subsequent questions (1 / (1.5 * 1.1 * 1.2) = ~0.5)
                    confidences[i] /= 1.2
            # this is only about `dummy_skill`
            if "link_to_for_response_selector" in candidates[i].get("type", ""):
                link_to_question = candidates[i]["text"]
                link_to_human_attrs = candidates[i].get("human_attributes", {})
        if skill_names[i] == "dummy_skill" and "question" in candidates[i].get("type", ""):
            dummy_question = candidates[i]["text"]
            dummy_question_human_attr = candidates[i].get("human_attributes", {})

        if curr_score is None:
            cand_scores = scores[i]
            confidence = confidences[i]
            skill_name = skill_names[i]
            score_conv_eval = calculate_single_convers_evaluator_score(cand_scores)
            score = CONV_EVAL_STRENGTH * score_conv_eval + CONFIDENCE_STRENGTH * confidence
            logger.info(
                f"Skill {skill_name} has final score: {score}. Confidence: {confidence}. "
                f"Toxicity: {is_toxics[i]}. Cand scores: {cand_scores}"
            )
            curr_single_scores.append(score)
        else:
            cand_scores = scores[i]
            skill_name = skill_names[i]
            score_conv_eval = calculate_single_convers_evaluator_score(cand_scores)
            score = CONV_EVAL_STRENGTH * score_conv_eval + curr_score
            logger.info(
                f"Skill {skill_name} has final score: {score}. " f"Toxicity: {is_toxics[i]}. Cand scores: {cand_scores}"
            )
            curr_single_scores.append(score)

    highest_conf_exist = True if any(confidences >= 1.0) else False
    if highest_conf_exist:
        logger.info("Found skill with the highest confidence.")
    for j in range(len(candidates)):
        if highest_conf_exist and confidences[j] < 1.0 and curr_single_scores[j] < very_big_score:
            # need to drop this candidates
            logger.info(f"Dropping {skill_names[j]} which does not have a highest confidence or `very big score`.")
            curr_single_scores[j] = very_low_score

    best_id = np.argmax(curr_single_scores)
    best_candidate = candidates[best_id]
    best_skill_name = skill_names[int(best_id)]

    best_candidate = add_question_to_statement(
        best_candidate,
        best_skill_name,
        dummy_question,
        dummy_question_human_attr,
        link_to_question,
        link_to_human_attrs,
        not_sure_factoid,
    )

    return best_candidate, best_id, curr_single_scores
Example #10
0
def respond():
    print("response generation started")
    st_time = time.time()
    dialogs_batch = request.json["dialogs"]
    # following 3 lists have len = number of samples going to the model
    annotations_depths = []
    dial_ids = []
    input_batch = []
    # following 4 lists have len = len(dialogs_batch)
    entities = []
    lets_chat_about_flags = []
    nounphrases = []
    special_intents_flags = []
    chosen_topics = {}
    for d_id, dialog in enumerate(dialogs_batch):
        try:
            user_input_text = dialog["human_utterances"][-1]["text"]
            bot_uttr = dialog["bot_utterances"][-1] if len(
                dialog["bot_utterances"]) > 0 else {}
            switch_choose_topic = if_choose_topic(
                dialog["human_utterances"][-1], bot_uttr)
            # spacy_nounphrases
            spacy_nounphrases = get_spacy_nounphrases(
                dialog["human_utterances"][-1])
            nounphrases.append(
                re.compile(join_sentences_in_or_pattern(spacy_nounphrases), re.
                           IGNORECASE) if spacy_nounphrases else "")
            # entities
            curr_ents = get_named_entities(dialog["human_utterances"][-1])
            entities.append(
                re.compile(join_sentences_in_or_pattern(curr_ents), re.
                           IGNORECASE) if curr_ents else "")
            # intents
            lets_chat_about_flag, special_intents_flag = get_intents_flags(
                dialog["human_utterances"][-1])
            lets_chat_about_flags.append(lets_chat_about_flag)
            special_intents_flags.append(special_intents_flag)

            anntr_history_len = DEFAULT_ANNTR_HISTORY_LEN
            bot_uttrs_for_dff_check = dialog["bot_utterances"][-2:] if len(
                dialog["bot_utterances"]) > 1 else []
            dffs_flag = check_dffs(bot_uttrs_for_dff_check)
            if lets_chat_about_flag or switch_choose_topic:
                anntr_history_len = 0
            elif dffs_flag:
                anntr_history_len = DFF_ANNTR_HISTORY_LEN
            # if detected lets_chat is about topic from the file
            lets_chat_topic = get_lets_chat_topic(
                lets_chat_about_flag, dialog["human_utterances"][-1])
            # if prev skill == news_api_skill get news description and create knowledge fact
            news_api_fact = get_news_api_fact(
                bot_uttr, dialog["human_utterances"],
                not (switch_choose_topic or lets_chat_about_flag))
            # start creating data for kg service
            user_input_history = "\n".join(
                [i["text"] for i in dialog["utterances"]])

            annotators = {
                # "odqa": "answer_sentence",
                # "kbqa": "answer"
            }
            if not switch_choose_topic:
                user_input_knowledge, annotations_depth = get_knowledge_from_annotators(
                    annotators, dialog["utterances"], anntr_history_len)
            else:
                user_input_knowledge = ""
                annotations_depth = {}
            # add nounphrases and entities to the knowledge
            if user_input_knowledge:
                user_input_checked_sentence = (
                    space_join(spacy_nounphrases) + space_join(curr_ents) +
                    tokenize.sent_tokenize(user_input_knowledge)[0])
            else:
                user_input_checked_sentence = ""

            if user_input_knowledge:
                user_input = {
                    "checked_sentence": user_input_checked_sentence,
                    "knowledge": user_input_knowledge,
                    "text": user_input_text,
                    "history": user_input_history,
                }
                annotations_depths.append(annotations_depth)
                dial_ids.append(d_id)
                input_batch.append(user_input)

            retrieved_facts = get_annotations_from_dialog(
                dialog["utterances"][-anntr_history_len * 2 - 1:],
                "fact_retrieval")
            if retrieved_facts:
                for depth, fact in retrieved_facts[-TOP_N_FACTS:]:
                    user_input = {
                        "checked_sentence": fact,
                        "knowledge": fact,
                        "text": user_input_text,
                        "history": user_input_history,
                    }
                    input_batch.append(user_input)
                    annotations_depths.append({"retrieved_fact": depth})
                    dial_ids.append(d_id)

            if any(
                [switch_choose_topic, lets_chat_topic, lets_chat_about_flag]):
                if lets_chat_topic:
                    fact = random.sample(TOPICS_FACTS[lets_chat_topic], 1)[0]
                    chosen_topics[d_id] = lets_chat_topic
                    _chosen_topic_fact = "lets_chat_cobot_da"
                elif not get_entities(dialog["human_utterances"][-1],
                                      only_named=False,
                                      with_labels=False):
                    topic = random.sample(TOPICS_FACTS.keys(), 1)[0]
                    fact = random.sample(TOPICS_FACTS[topic], 1)[0]
                    chosen_topics[d_id] = topic
                    _chosen_topic_fact = "switch_random"
                else:
                    fact = ""
                if fact:
                    user_input = {
                        "checked_sentence": fact,
                        "knowledge": fact,
                        "text": user_input_text,
                        "history": user_input_history,
                        "chosen_topic_fact": _chosen_topic_fact,
                    }
                    input_batch.append(user_input)
                    annotations_depths.append({})
                    dial_ids.append(d_id)

            if news_api_fact:
                user_input = {
                    "checked_sentence": news_api_fact,
                    "knowledge": news_api_fact,
                    "text": user_input_text,
                    "history": user_input_history,
                    "news_api_fact": True,
                }
                input_batch.append(user_input)
                annotations_depths.append({})
                dial_ids.append(d_id)

            fact_random_facts = get_fact_random(
                dialog["utterances"][-anntr_history_len * 2 - 1:])
            if fact_random_facts:
                user_input = {
                    "checked_sentence": fact_random_facts[-1][1],
                    "knowledge": fact_random_facts[-1][1],
                    "text": user_input_text,
                    "history": user_input_history,
                    "fact_random_fact": True,
                }
                input_batch.append(user_input)
                annotations_depths.append(
                    {"fact_random": fact_random_facts[-1][0]})
                dial_ids.append(d_id)

            user_news = get_news(dialog["human_utterances"][-1], "human")
            bot_news = get_news(dialog["human_utterances"][-1], "bot")
            # all_news = get_news(dialog["human_utterances"][-1], "all")
            if user_news:
                news_desc = user_news[-1].get("decsription", "")
                if news_desc:
                    user_input = {
                        "checked_sentence": news_desc,
                        "knowledge": news_desc,
                        "text": user_input_text,
                        "history": user_input_history,
                        "news_fact": "human ",
                    }
                    input_batch.append(user_input)
                    annotations_depths.append({})
                    dial_ids.append(d_id)
            elif bot_news:
                news_desc = bot_news[-1].get("decsription", "")
                if news_desc:
                    user_input = {
                        "checked_sentence": news_desc,
                        "knowledge": news_desc,
                        "text": user_input_text,
                        "history": user_input_history,
                        "news_fact": "bot ",
                    }
                    input_batch.append(user_input)
                    annotations_depths.append({})
                    dial_ids.append(d_id)
            # elif all_news:
            #     user_input = {
            #         'checked_sentence': all_news[-1].get("decsription", ""),
            #         'knowledge': all_news[-1].get("decsription", ""),
            #         'text': user_input_text,
            #         'history': user_input_history,
            #         'news_fact': "all ",
            #         'news_title': all_news[-1].get("title", "")
            #     }
            #     input_batch.append(user_input)
            #     annotations_depths.append({})
            #     dial_ids.append(d_id)

        except Exception as ex:
            sentry_sdk.capture_exception(ex)
            logger.exception(ex)

    try:
        raw_responses = []
        if input_batch:
            logger.info(f"skill sends to service: {input_batch}")
            resp = requests.post(KNOWLEDGE_GROUNDING_SERVICE_URL,
                                 json={"batch": input_batch},
                                 timeout=1.5)
            raw_responses = resp.json()
            logger.info(f"skill receives from service: {raw_responses}")
        else:
            responses = [[""]]
            confidences = [[0.0]]
            attributes = [[{}]]
            logger.info(
                f"Collected no hypotheses, exiting with {list(zip(responses, confidences, attributes))}"
            )
            return jsonify(list(zip(responses, confidences, attributes)))

        dial_ids = np.array(dial_ids)
        attributes = []
        confidences = []
        responses = []

        for i, dialog in enumerate(dialogs_batch):
            curr_attributes = []
            curr_confidences = []
            curr_responses = []
            for curr_i in np.where(dial_ids == i)[0]:
                attr = {
                    "knowledge_paragraph":
                    input_batch[curr_i]["knowledge"],
                    "knowledge_checked_sentence":
                    input_batch[curr_i]["checked_sentence"],
                    "can_continue":
                    CAN_NOT_CONTINUE,
                    "confidence_case":
                    "",
                }

                already_was_active, short_long_response = get_penalties(
                    dialog["bot_utterances"], raw_responses[curr_i])
                curr_nounphrase_search = nounphrases[i].search(
                    raw_responses[curr_i]) if nounphrases[i] else False
                curr_entities_search = entities[i].search(
                    raw_responses[curr_i]) if entities[i] else False
                no_penalties = False
                fact_random_penalty = 0.0

                topic = chosen_topics.get(i, "")
                chosen_topic_fact_flag = input_batch[curr_i].get(
                    "chosen_topic_fact", "")
                curr_news_fact = input_batch[curr_i].get("news_fact", "")

                add_intro = ""
                if topic and chosen_topic_fact_flag:
                    add_intro = f"Okay, Let's chat about {topic}. "
                    confidence = HIGHEST_CONFIDENCE
                    no_penalties = True
                    attr[
                        "confidence_case"] += f"topic_fact: {chosen_topic_fact_flag} "
                    attr["response_parts"] = ["prompt"]
                elif input_batch[curr_i].get("news_api_fact", ""):
                    add_intro = random.choice([
                        "Sounds like ",
                        "Seems like ",
                        "Makes sense. ",
                        # "Here's what I've heard: ", "Here's something else I've heard: ",
                        "It reminds me that",
                        "This comes to my mind: ",
                        "",
                    ])
                    no_penalties = True
                    confidence = HIGHEST_CONFIDENCE
                    attr["confidence_case"] += "news_api_fact "
                elif input_batch[curr_i].get("fact_random_fact", ""):
                    fact_random_penalty = annotations_depths[curr_i].get(
                        "fact_random", 0.0)
                    confidence = DEFAULT_CONFIDENCE
                    attr["confidence_case"] += "fact_random_fact "
                elif curr_news_fact:
                    if curr_news_fact != "all":
                        confidence = NOUNPHRASE_ENTITY_CONFIDENCE
                    else:
                        confidence = DEFAULT_CONFIDENCE
                        curr_news_title = input_batch[curr_i].get(
                            "news_title", "")
                        if curr_news_title:
                            add_intro = f"I have just read that {curr_news_title}. "
                    attr["confidence_case"] += "news_fact: " + curr_news_fact
                elif (curr_nounphrase_search
                      or curr_entities_search) and lets_chat_about_flags[i]:
                    confidence = HIGHEST_CONFIDENCE
                    attr[
                        "confidence_case"] += "nounphrase_entity_and_lets_chat_about "
                    attr["response_parts"] = ["prompt"]
                elif curr_nounphrase_search or curr_entities_search:
                    confidence = NOUNPHRASE_ENTITY_CONFIDENCE
                    attr["confidence_case"] += "nounphrase_entity "
                elif lets_chat_about_flags[i]:
                    confidence = LETS_CHAT_ABOUT_CONFIDENDENCE
                    attr["confidence_case"] += "lets_chat_about "
                    attr["response_parts"] = ["prompt"]
                else:
                    confidence = DEFAULT_CONFIDENCE
                    attr["confidence_case"] += "default "

                acronym_flag = ABBRS.search(raw_responses[curr_i])
                if acronym_flag:
                    confidence = ABBRS_CONFIDENCE
                    attr["confidence_case"] += f"acronyms: {acronym_flag} "
                    logger.debug(f"KG skill: found acronyms: {acronym_flag}")
                special_char_flag = special_char_re.search(
                    raw_responses[curr_i])
                if special_char_flag:
                    confidence = HAS_SPEC_CHAR_CONFIDENCE
                    attr["confidence_case"] += "special_char "
                    logger.debug(
                        f"KG skill: found special_char: {special_char_flag}")
                if special_intents_flags[i]:
                    confidence = 0.0
                    attr["confidence_case"] += "special_intents "
                    logger.debug("KG skill: found special_intents")
                greetings_farewells_flag = greetings_farewells_re.search(
                    raw_responses[curr_i])
                if greetings_farewells_flag:
                    confidence = 0.0
                    attr["confidence_case"] += "greetings_farewells "
                    logger.debug(
                        f"KG skill: found greetings_farewells: {greetings_farewells_flag}"
                    )

                penalties = (
                    annotations_depths[curr_i].get("retrieved_fact", 0.0) +
                    fact_random_penalty + already_was_active +
                    short_long_response if not no_penalties else 0.0)
                confidence -= penalties
                if any([
                        acronym_flag,
                        special_char_flag,
                        special_intents_flags[i],
                        greetings_farewells_flag,
                        short_long_response,
                ]):
                    logger.debug(
                        f"KG skill: found penalties in response: {raw_responses[curr_i]}, skipping it"
                    )
                    continue
                else:
                    curr_attributes.append(attr)
                    curr_confidences.append(max(0.0, confidence))
                    curr_responses.append(
                        re.sub(r'\s([?.!",;:](?:\s|$))', r"\1",
                               add_intro + raw_responses[curr_i]).replace(
                                   " ' t", "'t"))
            attributes.append(curr_attributes)
            confidences.append(curr_confidences)
            responses.append(curr_responses)

    except Exception as ex:
        sentry_sdk.capture_exception(ex)
        logger.exception(ex)
        responses = [[""]]
        confidences = [[0.0]]
        attributes = [[{}]]

    logger.info(
        f"knowledge_grounding_skill exec time: {time.time() - st_time}")
    return jsonify(list(zip(responses, confidences, attributes)))
Example #11
0
    async def send(self, payload: Dict, callback: Callable):
        try:
            st_time = time.time()
            dialog = deepcopy(payload["payload"]["dialogs"][0])
            is_sensitive_case = is_sensitive_situation(dialog["human_utterances"][-1])
            all_prev_active_skills = payload["payload"]["all_prev_active_skills"][0]

            curr_topics = get_topics(dialog["human_utterances"][-1], which="cobot_topics")
            curr_nounphrases = get_entities(dialog["human_utterances"][-1], only_named=False, with_labels=False)

            if len(curr_topics) == 0:
                curr_topics = ["Phatic"]
            logger.info(f"Found topics: {curr_topics}")
            for i in range(len(curr_nounphrases)):
                np = re.sub(np_remove_expr, "", curr_nounphrases[i])
                np = re.sub(rm_spaces_expr, " ", np)
                if re.search(np_ignore_expr, np):
                    curr_nounphrases[i] = ""
                else:
                    curr_nounphrases[i] = np.strip()

            curr_nounphrases = [np for np in curr_nounphrases if len(np) > 0]

            logger.info(f"Found nounphrases: {curr_nounphrases}")

            cands = []
            confs = []
            human_attrs = []
            bot_attrs = []
            attrs = []

            cands += [choice(donotknow_answers)]
            confs += [0.5]
            attrs += [{"type": "dummy"}]
            human_attrs += [{}]
            bot_attrs += [{}]

            if len(dialog["utterances"]) > 14 and not is_sensitive_case:
                questions_same_nps = []
                for i, nphrase in enumerate(curr_nounphrases):
                    for q_id in NP_QUESTIONS.get(nphrase, []):
                        questions_same_nps += [QUESTIONS_MAP[str(q_id)]]

                if len(questions_same_nps) > 0:
                    logger.info("Found special nounphrases for questions. Return question with the same nounphrase.")
                    cands += [choice(questions_same_nps)]
                    confs += [0.5]
                    attrs += [{"type": "nounphrase_question"}]
                    human_attrs += [{}]
                    bot_attrs += [{}]

            link_to_question, human_attr = get_link_to_question(dialog, all_prev_active_skills)
            if link_to_question:
                _prev_bot_uttr = dialog["bot_utterances"][-2]["text"] if len(dialog["bot_utterances"]) > 1 else ""
                _bot_uttr = dialog["bot_utterances"][-1]["text"] if len(dialog["bot_utterances"]) > 0 else ""
                _prev_active_skill = (
                    dialog["bot_utterances"][-1]["active_skill"] if len(dialog["bot_utterances"]) > 0 else ""
                )

                _no_to_first_linkto = any([phrase in _bot_uttr for phrase in LINK_TO_PHRASES])
                _no_to_first_linkto = _no_to_first_linkto and all(
                    [phrase not in _prev_bot_uttr for phrase in LINK_TO_PHRASES]
                )
                _no_to_first_linkto = _no_to_first_linkto and is_no(dialog["human_utterances"][-1])
                _no_to_first_linkto = _no_to_first_linkto and _prev_active_skill != "dff_friendship_skill"

                _if_switch_topic = is_switch_topic(dialog["human_utterances"][-1])
                bot_uttr_dict = dialog["bot_utterances"][-1] if len(dialog["bot_utterances"]) > 0 else {}
                _if_choose_topic = if_choose_topic(dialog["human_utterances"][-1], bot_uttr_dict)
                _is_ask_me_something = ASK_ME_QUESTION_PATTERN.search(dialog["human_utterances"][-1]["text"])

                if len(dialog["human_utterances"]) > 1:
                    _was_cant_do = "cant_do" in get_intents(dialog["human_utterances"][-2]) and (
                        len(curr_nounphrases) == 0 or is_yes(dialog["human_utterances"][-1])
                    )
                    _was_cant_do_stop_it = "cant_do" in get_intents(dialog["human_utterances"][-2]) and is_no(
                        dialog["human_utterances"][-1]
                    )
                else:
                    _was_cant_do = False
                    _was_cant_do_stop_it = False

                if _was_cant_do_stop_it:
                    link_to_question = "Sorry, bye! #+#exit"
                    confs += [1.0]  # finish dialog request
                elif _no_to_first_linkto:
                    confs += [0.99]
                elif _is_ask_me_something or _if_switch_topic or _was_cant_do or _if_choose_topic:
                    confs += [1.0]  # Use it only as response selector retrieve skill output modifier
                else:
                    confs += [0.05]  # Use it only as response selector retrieve skill output modifier
                cands += [link_to_question]
                attrs += [{"type": "link_to_for_response_selector"}]
                human_attrs += [human_attr]
                bot_attrs += [{}]

            facts_same_nps = []
            for i, nphrase in enumerate(curr_nounphrases):
                for fact_id in NP_FACTS.get(nphrase, []):
                    facts_same_nps += [
                        f"Well, now that you've mentioned {nphrase}, I've remembered this. {FACTS_MAP[str(fact_id)]}. "
                        f"{(opinion_request_question() if random.random() < ASK_QUESTION_PROB else '')}"
                    ]

            if len(facts_same_nps) > 0 and not is_sensitive_case:
                logger.info("Found special nounphrases for facts. Return fact with the same nounphrase.")
                cands += [choice(facts_same_nps)]
                confs += [0.5]
                attrs += [{"type": "nounphrase_fact"}]
                human_attrs += [{}]
                bot_attrs += [{}]

            total_time = time.time() - st_time
            logger.info(f"dummy_skill exec time: {total_time:.3f}s")
            asyncio.create_task(
                callback(task_id=payload["task_id"], response=[cands, confs, human_attrs, bot_attrs, attrs])
            )
        except Exception as e:
            logger.exception(e)
            sentry_sdk.capture_exception(e)
            asyncio.create_task(callback(task_id=payload["task_id"], response=e))
Example #12
0
def pickup_topic_and_start_small_talk(dialog):
    """
    Pick up topic for small talk and return first response.

    Args:
        dialog: dialog from agent

    Returns:
        Tuple of (response, topic, confidence)
    """
    last_user_uttr = dialog["human_utterances"][-1]
    if len(dialog["bot_utterances"]) > 0:
        last_bot_uttr = dialog["bot_utterances"][-1]
    else:
        last_bot_uttr = {"text": "---", "annotations": {}}

    topic_user_wants_to_discuss = which_topic_lets_chat_about(
        last_user_uttr, last_bot_uttr)

    if if_choose_topic(last_user_uttr, last_bot_uttr) or if_switch_topic(
            last_user_uttr["text"].lower()):
        # user asks bot to chose topic: `pick up topic/what do you want to talk about/would you like to switch topic`
        # or bot asks user to chose topic and user says `nothing/anything/don't know`
        # if user asks to switch the topic
        topic = offer_topic(dialog)
        if topic in TOPIC_PATTERNS:
            if topic == "me":
                response = f"Let's talk about you. " + TOPIC_SCRIPTS.get(
                    topic, [""])[0]
            elif topic == "you":
                response = f"Let's talk about me. " + TOPIC_SCRIPTS.get(
                    topic, [""])[0]
            else:
                response = f"Let's talk about {topic}. " + TOPIC_SCRIPTS.get(
                    topic, [""])[0]
            confidence = BOT_TOPIC_START_CONFIDENCE
        else:
            response = ""
            confidence = 0.0
        logger.info(f"Bot initiates script on topic: `{topic}`.")
    elif topic_user_wants_to_discuss:
        # user said `let's talk about [topic]` or
        # bot said `what do you want to talk about/would you like to switch the topic`,
        #   and user answered [topic] (not something, nothing, i don't know - in this case,
        #   it will be gone through previous if)
        topic = topic_user_wants_to_discuss
        response = TOPIC_SCRIPTS.get(topic, [""])[0]
        if topic in NOT_SCRIPTED_TOPICS:
            confidence = YES_CONTINUE_CONFIDENCE
        else:
            confidence = USER_TOPIC_START_CONFIDENCE
        logger.info(f"User initiates script on topic: `{topic}`.")
    else:
        topic = find_topics_in_substring(
            dialog["human_utterances"][-1]["text"])
        topic = topic[-1] if len(topic) else ""
        if len(topic) > 0:
            response = TOPIC_SCRIPTS.get(topic, [""])[0]
            confidence = FOUND_WORD_START_CONFIDENCE
            logger.info(f"Found word in user utterance on topic: `{topic}`.")
        else:
            topic = ""
            response = ""
            confidence = 0.0

    return response, topic, confidence