Beispiel #1
0
def extract_topics(curr_uttr):
    """Extract entities as topics for news request. If no entities found, extract nounphrases.

    Args:
        curr_uttr: current human utterance dictionary

    Returns:
        list of mentioned entities/nounphrases
    """
    entities = get_entities(curr_uttr, only_named=True, with_labels=False)
    entities = [ent.lower() for ent in entities]
    entities = [
        ent
        for ent in entities
        if not (ent == "alexa" and curr_uttr["text"].lower()[:5] == "alexa") and "news" not in ent
    ]
    if len(entities) == 0:
        for ent in get_entities(curr_uttr, only_named=False, with_labels=False):
            if ent.lower() not in BANNED_UNIGRAMS and "news" not in ent.lower():
                if ent in entities:
                    pass
                else:
                    entities.append(ent)
    entities = [ent for ent in entities if len(ent) > 0]
    return entities
Beispiel #2
0
def kbqa_formatter_dialog(dialog: Dict):
    # Used by: kbqa annotator
    annotations = dialog["human_utterances"][-1]["annotations"]
    if "sentseg" in annotations:
        if "segments" in annotations["sentseg"]:
            sentences = deepcopy(annotations["sentseg"]["segments"])
        else:
            sentences = [deepcopy(annotations["sentseg"]["punct_sent"])]
    else:
        sentences = [deepcopy(dialog["human_utterances"][-1]["text"])]
    entity_substr = get_entities(dialog["human_utterances"][-1],
                                 only_named=True,
                                 with_labels=False)
    nounphrases = get_entities(dialog["human_utterances"][-1],
                               only_named=False,
                               with_labels=False)
    entities = []
    if entity_substr:
        entities = [entity_substr]
    elif nounphrases:
        entities = [nounphrases]
    else:
        entities = [[]]

    return [{"x_init": sentences, "entities": entities}]
Beispiel #3
0
def filter_nouns_for_conceptnet(annotated_phrase):
    if get_toxic(annotated_phrase, probs=False):
        return []
    subjects = get_entities(annotated_phrase, only_named=False, with_labels=False)
    subjects = [re.sub(possessive_pronouns, "", noun) for noun in subjects]
    subjects = [re.sub(r"(\bthe\b|\ba\b|\ban\b)", "", noun) for noun in subjects]
    subjects = [noun for noun in subjects if noun not in BANNED_NOUNS_FOR_OPINION_EXPRESSION]
    subjects = [
        noun
        for noun in subjects
        if not re.search(BANNED_WORDS_IN_NOUNS_FOR_OPINION_EXPRESSION_COMPILED, annotated_phrase["text"])
    ]
    for ent in get_entities(annotated_phrase, only_named=True, with_labels=True):
        subjects = remove_intersections_of_entities(ent["text"], subjects)

    bad_subjects = []
    for subject in subjects:
        if len(subject.split()) == 1:
            doc = nlp(subject)
            if doc[0].pos not in [PROPN, NOUN]:
                bad_subjects.append(subject)
    for bad_subj in bad_subjects:
        try:
            subjects.remove(bad_subj)
        except ValueError:
            pass
    subjects = [noun for noun in subjects if len(noun) > 0]

    return subjects
Beispiel #4
0
def get_main_info_annotations(annotated_utterance):
    intents = get_intents(annotated_utterance, which="all")
    topics = get_topics(annotated_utterance, which="all")
    named_entities = get_entities(annotated_utterance,
                                  only_named=True,
                                  with_labels=False)
    nounphrases = get_entities(annotated_utterance,
                               only_named=False,
                               with_labels=False)
    return intents, topics, named_entities, nounphrases
Beispiel #5
0
def get_spacy_nounphrases(utt):
    cob_nounphs = get_entities(utt, only_named=False, with_labels=False)
    spacy_nounphrases = []
    for ph in cob_nounphs:
        if not pos_tag([ph])[0][1].startswith("VB"):
            spacy_nounphrases.append(ph)
    return spacy_nounphrases
Beispiel #6
0
def extract_from_dialog(dialog):
    prev_news_outputs = get_skill_outputs_from_dialog(
        dialog["utterances"][-3:], "news_api_skill", activated=True)
    if len(prev_news_outputs) > 0:
        prev_news_output = prev_news_outputs[-1]
    else:
        prev_news_output = {}
    no_detected = is_no(dialog["human_utterances"][-1])
    nounphrases = get_entities(dialog["human_utterances"][-1],
                               only_named=False,
                               with_labels=False)

    if prev_news_output.get(
            "news_status",
            "finished") == OPINION_REQUEST_STATUS or (prev_news_output.get(
                "news_status", "finished") == OFFERED_NEWS_DETAILS_STATUS
                                                      and no_detected):
        verb_noun_phrases, sources = extract_verb_noun_phrases(
            prev_news_outputs[-1].get("text", "nothing"),
            only_i_do_that=False,
            nounphrases=nounphrases)
    else:
        verb_noun_phrases, sources = extract_verb_noun_phrases(
            dialog["utterances"][-1]["text"],
            only_i_do_that=False,
            nounphrases=nounphrases)
    return verb_noun_phrases, sources
Beispiel #7
0
def ask_if_user_thinks_that_gaming_is_unhealthy_response(vars):
    response = (
        "It is known that people who play computer games too much can have health problems, "
        "both physical and emotional. Do you agree?")
    human_uttr = state_utils.get_last_human_utterance(vars)
    entities = get_entities(human_uttr, only_named=True)
    logger.info(
        f"(ask_if_user_thinks_that_gaming_is_unhealthy_response)entities: {entities}"
    )
    bot_text = state_utils.get_last_bot_utterance(vars).get("text", "").lower()
    flags_set = False
    if not if_chat_about_particular_topic(
            human_uttr, compiled_pattern=VIDEO_GAME_WORDS_COMPILED_PATTERN):
        flags_set, response = common_nlg.maybe_set_confidence_and_continue_based_on_previous_bot_phrase(
            vars, bot_text, response)
    if not flags_set:
        if entities:
            state_utils.set_confidence(
                vars, confidence=common_nlg.CONF_092_CAN_CONTINUE)
            state_utils.set_can_continue(
                vars, continue_flag=common_constants.CAN_CONTINUE_SCENARIO)
        else:
            state_utils.set_confidence(vars, confidence=common_nlg.CONF_1)
            state_utils.set_can_continue(
                vars, continue_flag=common_constants.MUST_CONTINUE)
    return response
Beispiel #8
0
def get_nounphrases_from_human_utterance(ctx: Context, actor: Actor):
    nps = common_utils.get_entities(
        get_last_human_utterance(ctx, actor),
        only_named=False,
        with_labels=False,
    )
    return nps
Beispiel #9
0
def get_named_entities(utt):
    entities = []

    for ent in get_entities(utt, only_named=True, with_labels=False):
        if ent not in UNIGRAMS and not (ent == "alexa" and
                                        utt["text"].lower()[:5] == "alexa"):
            entities.append(ent)
    return entities
Beispiel #10
0
def get_named_entities_from_human_utterance(ctx: Context, actor: Actor):
    # ent is a dict! ent = {"text": "London":, "type": "LOC"}
    entities = common_utils.get_entities(
        get_last_human_utterance(ctx, actor),
        only_named=True,
        with_labels=True,
    )
    return entities
Beispiel #11
0
def extract_possible_names(annotated_utterance, only_named, with_labels):
    entities = get_entities(
        annotated_utterance,
        only_named=only_named,
        with_labels=with_labels,
    )
    if not only_named:
        nounphrases = annotated_utterance["annotations"].get(
            "spacy_nounphrases", [])
        if with_labels:
            nounphrases = [{"text": np, "label": "misc"} for np in nounphrases]
        entities += nounphrases
    return entities
Beispiel #12
0
def el_formatter_dialog(dialog: Dict):
    # Used by: entity_linking annotator
    num_last_utterances = 2
    ner_output = get_entities(dialog["human_utterances"][-1],
                              only_named=True,
                              with_labels=True)
    nounphrases = get_entities(dialog["human_utterances"][-1],
                               only_named=False,
                               with_labels=False)
    entity_substr_list = []
    if ner_output:
        for entity in ner_output:
            if entity and isinstance(
                    entity, dict
            ) and "text" in entity and entity["text"].lower() != "alexa":
                entity_substr_list.append(entity["text"])
    entity_substr_lower_list = {
        entity_substr.lower()
        for entity_substr in entity_substr_list
    }
    dialog = utils.get_last_n_turns(dialog, bot_last_turns=1)
    dialog = utils.replace_with_annotated_utterances(dialog, mode="punct_sent")
    context = [[
        uttr["text"] for uttr in dialog["utterances"][-num_last_utterances:]
    ]]
    if nounphrases:
        entity_substr_list += [
            nounphrase for nounphrase in nounphrases
            if nounphrase.lower() not in entity_substr_lower_list
        ]
    entity_substr_list = list(set(entity_substr_list))

    return [{
        "entity_substr": [entity_substr_list],
        "template": [""],
        "context": context
    }]
Beispiel #13
0
    def add_bot_encounters(self, human_utters, bot_utters, human_utter_index):
        bot_utter = bot_utters[0] if bot_utters else {}
        entities = get_entities(bot_utter, only_named=False, with_labels=False)
        entities = [
            ent for ent in entities if self.name in wnl.lemmatize(ent, "n")
        ]

        active_skill = bot_utter.get("active_skill", "pre_start")
        for entity in entities:
            bee = BotEntityEncounter(
                human_utterance_index=human_utter_index,
                full_name=entity,
                skill_name=active_skill,
            )
            self.bot_encounters.append(bee)
Beispiel #14
0
def fav_food_check(vars):
    flag = False
    user_fav_food = get_entities(state_utils.get_last_human_utterance(vars), only_named=False, with_labels=False)
    # cobot_topic = "Food_Drink" in get_topics(state_utils.get_last_human_utterance(vars), which="cobot_topics")
    food_words_search = bool(re.search(FOOD_WORDS_RE, state_utils.get_last_human_utterance(vars)["text"]))
    if all(
        [
            any([user_fav_food, check_conceptnet(vars), food_words_search]),
            # condition_utils.no_requests(vars),
            not bool(re.search(NO_WORDS_RE, state_utils.get_last_human_utterance(vars)["text"])),
            not dont_want_talk(vars),
        ]
    ):
        flag = True
    logger.info(f"fav_food_check {flag}")
    return flag
Beispiel #15
0
def fav_food_request(ngrams, vars):
    flag = False
    user_fav_food = get_entities(state_utils.get_last_human_utterance(vars),
                                 only_named=False,
                                 with_labels=False)
    food_words_search = re.search(
        FOOD_WORDS_RE,
        state_utils.get_last_human_utterance(vars)["text"])
    if all([
            any([user_fav_food,
                 check_conceptnet(vars), food_words_search]),
            condition_utils.no_requests(vars),
            not re.search(NO_WORDS_RE,
                          state_utils.get_last_human_utterance(vars)["text"]),
    ]):
        flag = True
    logger.info(f"fav_food_request {flag}")
    return flag
Beispiel #16
0
def generate_acknowledgement_response(ctx: Context) -> REPLY_TYPE:
    """Generate acknowledgement for human questions.

    Returns:
        string acknowledgement (templated acknowledgement from `midas_acknowledgements.json` file,
        confidence (default ACKNOWLEDGEMENT_CONF),
        human attributes (empty),
        bot attributes (empty),
        attributes (with response parts set to acknowledgement)
    """
    dialog = ctx.misc["agent"]["dialog"]
    curr_intents = get_current_intents(dialog["human_utterances"][-1])
    curr_considered_intents = [
        intent for intent in curr_intents
        if intent in MIDAS_INTENT_ACKNOWLEDGEMENTS
    ]

    ackn_response = ""
    attr = {}
    curr_human_entities = get_entities(dialog["human_utterances"][-1],
                                       only_named=False,
                                       with_labels=False)
    contains_question = is_any_question_sentence_in_utterance(
        dialog["human_utterances"][-1])

    # we generate acknowledgement ONLY if we have some entities!
    if curr_considered_intents and len(
            curr_human_entities) and contains_question:
        # can generate acknowledgement
        ackn_response, attr = generate_acknowledgement(
            dialog["human_utterances"][-1], curr_intents,
            curr_considered_intents)
    elif contains_question:
        ackn_response = random.choice(MANY_INTERESTING_QUESTIONS)
        attr = {"response_parts": ["acknowledgement"]}
    elif not contains_question and "opinion" in curr_considered_intents:
        ackn_response = get_midas_intent_acknowledgement("opinion", "")

    return ackn_response, ACKNOWLEDGEMENT_CONF, {}, {}, attr
Beispiel #17
0
def generate_acknowledgement(last_human_utterances, curr_intents,
                             curr_considered_intents):
    ackn_response = ""
    is_need_nounphrase_intent = any(
        [intent in curr_intents for intent in ["open_question_opinion"]])
    if is_need_nounphrase_intent:
        curr_nounphrase = get_entities(last_human_utterances,
                                       only_named=False,
                                       with_labels=False)
        curr_nounphrase = curr_nounphrase[-1] if len(
            curr_nounphrase) > 0 and curr_nounphrase[-1] else ""
        if curr_nounphrase:
            ackn_response = get_midas_intent_acknowledgement(
                curr_considered_intents[-1], curr_nounphrase)
    else:
        # to reformulate question, we take only the last human sentence
        last_human_sent = get_last_human_sent(last_human_utterances)
        curr_reformulated_question = reformulate_question_to_statement(
            last_human_sent)
        ackn_response = get_midas_intent_acknowledgement(
            curr_considered_intents[-1], curr_reformulated_question)
    attr = {"response_parts": ["acknowledgement"]}
    return ackn_response, attr
Beispiel #18
0
def extract_movies_names_from_annotations(annotated_uttr,
                                          check_full_utterance=False):
    movies_titles = None
    if "entity_detection" in annotated_uttr["annotations"]:
        movies_titles = []
        entities = get_entities(annotated_uttr,
                                only_named=False,
                                with_labels=True)
        for ent in entities:
            if ent.get("label", "") == "videoname":
                movies_titles += [ent["text"]]

    # for now let's remove full utterance check but add entity_linking usage!
    if not movies_titles:
        # either None or empty list
        if "wiki_parser" in annotated_uttr["annotations"]:
            movies_titles = []
            for ent_name, ent_dict in annotated_uttr["annotations"][
                    "wiki_parser"].get("entities_info", {}).items():
                instance_of_types = [
                    el[0] for el in ent_dict.get("instance of", [])
                ]
                instance_of_types += [
                    el[0] for el in ent_dict.get("types_2hop", [])
                ]
                if (len(
                        set(instance_of_types).intersection(
                            set(topic_types["film"]))) > 0
                        and ent_dict.get("token_conf", 0.0) >= 0.5
                        and ent_dict.get("conf", 0.0) >= 0.5):
                    movies_titles += [
                        ent_dict.get("entity_label", ent_name).lower()
                    ]

    # if check_full_utterance:
    #     movies_titles += [re.sub(r"[\.\?,!]", "", annotated_uttr["text"]).strip()]
    return movies_titles
Beispiel #19
0
def tag_based_response_selection(dialog,
                                 candidates,
                                 scores,
                                 confidences,
                                 bot_utterances,
                                 all_prev_active_skills=None):
    all_prev_active_skills = all_prev_active_skills if all_prev_active_skills is not None else []
    all_prev_active_skills = Counter(all_prev_active_skills)
    annotated_uttr = dialog["human_utterances"][-1]
    all_user_intents, all_user_topics, all_user_named_entities, all_user_nounphrases = get_main_info_annotations(
        annotated_uttr)

    _is_switch_topic_request = is_switch_topic(annotated_uttr)
    _is_force_intent = any(
        [_intent in all_user_intents for _intent in FORCE_INTENTS_IC.keys()])
    # if user utterance contains any question (REGEXP & punctuation check!)
    _is_require_action_intent = is_any_question_sentence_in_utterance({
        "text":
        annotated_uttr.get("annotations",
                           {}).get("sentseg", {}).get("punct_sent",
                                                      annotated_uttr["text"])
    })
    # if user utterance contains any question AND requires some intent by socialbot
    _is_require_action_intent = _is_require_action_intent and any([
        _intent in all_user_intents
        for _intent in REQUIRE_ACTION_INTENTS.keys()
    ])
    _force_intents_detected = [
        _intent for _intent in FORCE_INTENTS_IC.keys()
        if _intent in all_user_intents
    ]
    # list of user intents which require some action by socialbot
    _require_action_intents_detected = [
        _intent for _intent in REQUIRE_ACTION_INTENTS.keys()
        if _intent in all_user_intents
    ]
    _force_intents_skills = sum([
        FORCE_INTENTS_IC.get(_intent, [])
        for _intent in _force_intents_detected
    ], [])
    # list of intents required by the socialbot
    _required_actions = sum([
        REQUIRE_ACTION_INTENTS.get(_intent, [])
        for _intent in _require_action_intents_detected
    ], [])
    _contains_entities = len(
        get_entities(annotated_uttr, only_named=False, with_labels=False)) > 0
    _is_active_skill_can_not_continue = False

    _prev_bot_uttr = dialog["bot_utterances"][-1] if len(
        dialog["bot_utterances"]) > 0 else {}
    _prev_active_skill = dialog["bot_utterances"][-1]["active_skill"] if len(
        dialog["bot_utterances"]) > 0 else ""
    _prev_prev_active_skill = dialog["bot_utterances"][-2][
        "active_skill"] if len(dialog["bot_utterances"]) > 1 else ""
    _no_script_two_times_in_a_row = False
    if _prev_active_skill and _prev_prev_active_skill:
        if all([
                skill not in ACTIVE_SKILLS + ALMOST_ACTIVE_SKILLS
                for skill in [_prev_active_skill, _prev_prev_active_skill]
        ]):
            _no_script_two_times_in_a_row = True
    disliked_skills = get_updated_disliked_skills(
        dialog, can_not_be_disliked_skills=CAN_NOT_BE_DISLIKED_SKILLS)

    _is_dummy_linkto_available = any([
        cand_uttr["skill_name"] == "dummy_skill"
        and cand_uttr.get("type", "") == "link_to_for_response_selector"
        for cand_uttr in candidates
    ])

    categorized_hyps = {}
    categorized_prompts = {}
    for dasuffix in ["reqda", ""]:
        for actsuffix in ["active", "continued", "finished"]:
            for suffix in [
                    "same_topic_entity_no_db",
                    "same_topic_entity_db",
                    "othr_topic_entity_no_db",
                    "othr_topic_entity_db",
            ]:
                categorized_hyps[f"{actsuffix}_{suffix}_{dasuffix}"] = []
                categorized_prompts[f"{actsuffix}_{suffix}_{dasuffix}"] = []

    CASE = ""
    acknowledgement_hypothesis = {}

    for cand_id, cand_uttr in enumerate(candidates):
        if confidences[cand_id] == 0.0 and cand_uttr[
                "skill_name"] not in ACTIVE_SKILLS:
            logger.info(
                f"Dropping cand_id: {cand_id} due to toxicity/badlists")
            continue

        all_cand_intents, all_cand_topics, all_cand_named_entities, all_cand_nounphrases = get_main_info_annotations(
            cand_uttr)
        skill_name = cand_uttr["skill_name"]
        _is_dialog_abandon = get_dialog_breakdown_annotations(
            cand_uttr) and PRIORITIZE_NO_DIALOG_BREAKDOWN
        _is_just_prompt = (cand_uttr["skill_name"] == "dummy_skill" and any([
            question_type in cand_uttr.get("type", "") for question_type in
            ["normal_question", "link_to_for_response_selector"]
        ])) or cand_uttr.get("response_parts", []) == ["prompt"]
        if cand_uttr["confidence"] == 1.0:
            # for those hypotheses where developer forgot to set tag to MUST_CONTINUE
            cand_uttr["can_continue"] = MUST_CONTINUE
        _can_continue = cand_uttr.get("can_continue", CAN_NOT_CONTINUE)

        _user_wants_to_chat_about_topic = (
            if_chat_about_particular_topic(annotated_uttr)
            and "about it" not in annotated_uttr["text"].lower())
        _user_does_not_want_to_chat_about_topic = if_not_want_to_chat_about_particular_topic(
            annotated_uttr)
        _user_wants_bot_to_choose_topic = if_choose_topic(
            annotated_uttr, _prev_bot_uttr)

        if any([
                phrase.lower() in cand_uttr["text"].lower()
                for phrase in LINK_TO_PHRASES
        ]):
            # add `prompt` to response_parts if any linkto phrase in hypothesis
            cand_uttr["response_parts"] = cand_uttr.get("response_parts",
                                                        []) + ["prompt"]

        # identifies if candidate contains named entities from last human utterance
        _same_named_entities = (len(
            get_common_tokens_in_lists_of_strings(
                all_cand_named_entities, all_user_named_entities)) > 0)
        # identifies if candidate contains all (not only named) entities from last human utterance
        _same_nounphrases = len(
            get_common_tokens_in_lists_of_strings(all_cand_nounphrases,
                                                  all_user_nounphrases)) > 0
        _same_topic_entity = (_same_named_entities or _same_nounphrases
                              ) and PRIORITIZE_WITH_SAME_TOPIC_ENTITY

        _is_active_skill = (_prev_active_skill == cand_uttr["skill_name"] or
                            cand_uttr.get("can_continue", "") == MUST_CONTINUE)
        _is_active_skill = _is_active_skill and skill_name in ACTIVE_SKILLS
        _is_active_skill = _is_active_skill and (_can_continue in [
            MUST_CONTINUE, CAN_CONTINUE_SCENARIO, CAN_NOT_CONTINUE
        ] or (_can_continue == CAN_CONTINUE_PROMPT
              and all_prev_active_skills.get(skill_name, []) < 10))
        _is_active_skill = _is_active_skill and PRIORITIZE_SCRIPTED_SKILLS
        if _is_active_skill:
            # we will forcibly add prompt if current scripted skill finishes scenario,
            # and has no opportunity to continue at all.
            _is_active_skill_can_not_continue = _is_active_skill and _can_continue in [
                CAN_NOT_CONTINUE
            ]

        if _is_force_intent:
            # =====force intents, choose as best_on_topic hypotheses from skills responding this request=====

            CASE = "Force intent."
            if cand_uttr["skill_name"] in _force_intents_skills:
                categorized_hyps, categorized_prompts = categorize_candidate(
                    cand_id,
                    skill_name,
                    categorized_hyps,
                    categorized_prompts,
                    _is_just_prompt,
                    _is_active_skill,
                    _can_continue,
                    _same_topic_entity,
                    _is_dialog_abandon,
                    _is_required_da=False,
                )

        elif _is_switch_topic_request or _user_does_not_want_to_chat_about_topic or _user_wants_bot_to_choose_topic:
            # =====direct request by user to switch the topic of current conversation=====
            # give priority to dummy linkto hypothesis if available, else other prompts if available.
            _is_active_skill = (
                cand_uttr.get("type", "") == "link_to_for_response_selector"
                if _is_dummy_linkto_available else _is_just_prompt)
            # no priority to must_continue to skip incorrect continuation of script
            _can_continue = CAN_CONTINUE_SCENARIO if _can_continue == MUST_CONTINUE else _can_continue

            CASE = "Switch topic intent."
            if len(all_user_named_entities) > 0 or len(
                    all_user_nounphrases) > 0:
                # -----user defines new topic/entity-----
                # _same_topic_entity does not depend on hyperparameter in these case
                _same_topic_entity = _same_named_entities or _same_nounphrases

                categorized_hyps, categorized_prompts = categorize_candidate(
                    cand_id,
                    skill_name,
                    categorized_hyps,
                    categorized_prompts,
                    _is_just_prompt,
                    _is_active_skill,
                    _can_continue,
                    _same_topic_entity,
                    _is_dialog_abandon,
                    _is_required_da=False,
                )
            else:
                # -----user want socialbot to define new topic/entity-----
                categorized_hyps, categorized_prompts = categorize_candidate(
                    cand_id,
                    skill_name,
                    categorized_hyps,
                    categorized_prompts,
                    _is_just_prompt,
                    _is_active_skill,
                    _can_continue,
                    _same_topic_entity,
                    _is_dialog_abandon,
                    _is_required_da=False,
                )

        elif _user_wants_to_chat_about_topic:
            # user wants to chat about particular topic

            CASE = "User wants to talk about topic."
            # in this case we do not give priority to previously active skill (but give to must continue skill!)
            # because now user wants to talk about something particular
            _is_active_skill = cand_uttr.get("can_continue",
                                             "") == MUST_CONTINUE
            # _same_topic_entity does not depend on hyperparameter in these case
            _same_topic_entity = _same_named_entities or _same_nounphrases

            categorized_hyps, categorized_prompts = categorize_candidate(
                cand_id,
                skill_name,
                categorized_hyps,
                categorized_prompts,
                _is_just_prompt,
                _is_active_skill,
                _can_continue,
                _same_topic_entity,
                _is_dialog_abandon,
                _is_required_da=False,
            )

        elif _is_require_action_intent and PRIORITIZE_WITH_REQUIRED_ACT:
            # =====user intent requires particular action=====

            CASE = "User intent requires action. USER UTTERANCE CONTAINS QUESTION."
            _is_grounding_reqda = (skill_name == "dff_grounding_skill"
                                   and cand_uttr.get(
                                       "type", "") == "universal_response")
            _is_active_skill = cand_uttr.get(
                "can_continue",
                "") == MUST_CONTINUE  # no priority to prev active skill
            _can_continue = CAN_NOT_CONTINUE  # no priority to scripted skills

            if set(all_cand_intents).intersection(
                    set(_required_actions
                        )) or _is_grounding_reqda or _is_active_skill:
                # -----one of the can intent is in intents required by user-----
                categorized_hyps, categorized_prompts = categorize_candidate(
                    cand_id,
                    skill_name,
                    categorized_hyps,
                    categorized_prompts,
                    _is_just_prompt,
                    _is_active_skill,
                    _can_continue,
                    _same_topic_entity,
                    _is_dialog_abandon,
                    _is_required_da=True,
                )
            else:
                # -----NO required dialog acts-----
                categorized_hyps, categorized_prompts = categorize_candidate(
                    cand_id,
                    skill_name,
                    categorized_hyps,
                    categorized_prompts,
                    _is_just_prompt,
                    _is_active_skill,
                    _can_continue,
                    _same_topic_entity,
                    _is_dialog_abandon,
                    _is_required_da=False,
                )

        else:
            # =====user intent does NOT require particular action=====

            CASE = "General case."
            categorized_hyps, categorized_prompts = categorize_candidate(
                cand_id,
                skill_name,
                categorized_hyps,
                categorized_prompts,
                _is_just_prompt,
                _is_active_skill,
                _can_continue,
                _same_topic_entity,
                _is_dialog_abandon,
                _is_required_da=False,
            )

        # a bit of rule based help

        if (len(dialog["human_utterances"]) == 1
                and cand_uttr["skill_name"] == "dff_friendship_skill"
                and greeting_spec in cand_uttr["text"]):
            categorized_hyps = add_to_top1_category(cand_id, categorized_hyps,
                                                    _is_require_action_intent)
        elif (cand_uttr["skill_name"] == "dff_friendship_skill"
              and (how_are_you_spec in cand_uttr["text"]
                   or what_i_can_do_spec in cand_uttr["text"])
              and len(dialog["utterances"]) < 16):
            categorized_hyps = add_to_top1_category(cand_id, categorized_hyps,
                                                    _is_require_action_intent)
        # elif cand_uttr["skill_name"] == 'program_y_dangerous' and cand_uttr['confidence'] == 0.98:
        #     categorized_hyps = add_to_top1_category(cand_id, categorized_hyps, _is_require_action_intent)
        elif cand_uttr[
                "skill_name"] == "small_talk_skill" and is_sensitive_situation(
                    dialog["human_utterances"][-1]):
            # let small talk to talk about sex ^_^
            categorized_hyps = add_to_top1_category(cand_id, categorized_hyps,
                                                    _is_require_action_intent)
        elif cand_uttr["confidence"] >= 1.0:
            # -------------------- SUPER CONFIDENCE CASE HERE! --------------------
            categorized_hyps = add_to_top1_category(cand_id, categorized_hyps,
                                                    _is_require_action_intent)

        if cand_uttr["skill_name"] == "dff_grounding_skill" and [
                "acknowledgement"
        ] == cand_uttr.get("response_parts", []):
            acknowledgement_hypothesis = deepcopy(cand_uttr)

    logger.info(f"Current CASE: {CASE}")
    # now compute current scores as one float value
    curr_single_scores = compute_curr_single_scores(candidates, scores,
                                                    confidences)

    # remove disliked skills from hypotheses
    if IGNORE_DISLIKED_SKILLS:
        for category in categorized_hyps:
            new_ids = []
            for cand_id in categorized_hyps[category]:
                if (candidates[cand_id]["skill_name"] in disliked_skills
                        and candidates[cand_id].get("can_continue",
                                                    CAN_NOT_CONTINUE)
                        == MUST_CONTINUE):
                    disliked_skills.remove(candidates[cand_id]["skill_name"])
                if candidates[cand_id]["skill_name"] not in disliked_skills:
                    new_ids.append(cand_id)
            categorized_hyps[category] = deepcopy(new_ids)
        for category in categorized_prompts:
            new_ids = []
            for cand_id in categorized_prompts[category]:
                if (candidates[cand_id]["skill_name"] in disliked_skills
                        and candidates[cand_id].get("can_continue",
                                                    CAN_NOT_CONTINUE)
                        == MUST_CONTINUE):
                    disliked_skills.remove(candidates[cand_id]["skill_name"])
                if candidates[cand_id]["skill_name"] not in disliked_skills:
                    new_ids.append(cand_id)
            categorized_prompts[category] = deepcopy(new_ids)

    best_cand_id = pickup_best_id(categorized_hyps, candidates,
                                  curr_single_scores, bot_utterances)
    best_candidate = candidates[best_cand_id]
    best_candidate["human_attributes"] = best_candidate.get(
        "human_attributes", {})
    # save updated disliked skills to human attributes of the best candidate
    best_candidate["human_attributes"]["disliked_skills"] = disliked_skills
    logger.info(f"Best candidate: {best_candidate}")
    n_sents_without_prompt = len(sent_tokenize(best_candidate["text"]))
    _is_best_not_script = best_candidate[
        "skill_name"] not in ACTIVE_SKILLS + ALMOST_ACTIVE_SKILLS
    no_question_by_user = "******" not in dialog["human_utterances"][-1][
        "annotations"].get("sentseg",
                           {}).get("punct_sent",
                                   dialog["human_utterances"][-1]["text"])

    # if `no` to 1st in a row linkto question, and chosen response is not from scripted skill
    _no_to_first_linkto = is_no(dialog["human_utterances"][-1]) and any([
        phrase.lower() in _prev_bot_uttr.get("text", "").lower()
        for phrase in LINK_TO_PHRASES
    ])
    # if chosen short response or question by not-scripted skill
    _is_short_or_question_by_not_script = _is_best_not_script and (
        "?" in best_candidate["text"]
        or len(best_candidate["text"].split()) < 4)
    _no_questions_for_3_steps = not any([
        is_any_question_sentence_in_utterance(uttr)
        for uttr in dialog["bot_utterances"][-3:]
    ])

    if PRIORITIZE_PROMTS_WHEN_NO_SCRIPTS:
        if (_no_script_two_times_in_a_row
                and _is_short_or_question_by_not_script
                and no_question_by_user) or (_no_to_first_linkto
                                             and _is_best_not_script):
            # if no scripted skills 2 time sin a row before, current chosen best cand is not scripted, contains `?`,
            # and user utterance does not contain "?", replace utterance with dummy!
            best_prompt_id = pickup_best_id(categorized_prompts, candidates,
                                            curr_single_scores, bot_utterances)
            best_candidate = deepcopy(candidates[best_prompt_id])
            best_cand_id = best_prompt_id

    if does_not_require_prompt(candidates, best_cand_id):
        # the candidate already contains a prompt or a question or of a length more than 200 symbols
        logger.info(
            "Best candidate contains prompt, question, request or length of > 200 symbols. Do NOT add prompt."
        )
        pass
    elif sum(categorized_prompts.values(), []):
        # best cand is 3d times in a row not scripted skill, let's append linkto

        # need to add some prompt, and have a prompt
        _add_prompt_forcibly = best_candidate[
            "skill_name"] == _prev_active_skill and _is_active_skill_can_not_continue
        _add_prompt_forcibly = _add_prompt_forcibly and not _contains_entities

        # prompts are added:
        # - in 1 out of 10 cases, if current human utterance does not contain entities,
        # and no prompt for several last bot utterances
        # - if PRIORITIZE_PROMTS_WHEN_NO_SCRIPTS and current utterance is from active on prev step scripted skill and
        # it has a status can-not-continue
        # - if PRIORITIZE_PROMTS_WHEN_NO_SCRIPTS and last 2 bot uttr are not from scripted skill,
        # and current best uttr is also from not-scripted skill
        if ((prompt_decision() and not _contains_entities
             and _no_questions_for_3_steps)
                or (_add_prompt_forcibly and PRIORITIZE_PROMTS_WHEN_NO_SCRIPTS)
                or
            (PRIORITIZE_PROMTS_WHEN_NO_SCRIPTS
             and _no_script_two_times_in_a_row and _is_best_not_script)):
            logger.info("Decided to add a prompt to the best candidate.")
            best_prompt_id = pickup_best_id(categorized_prompts, candidates,
                                            curr_single_scores, bot_utterances)
            # as we have only one active skill, let's consider active skill as that one providing prompt
            # but we also need to reassign all the attributes
            best_prompt = candidates[best_prompt_id]
            best_candidate[
                "text"] = f'{best_candidate["text"]} {best_prompt["text"]}'
            best_candidate["attributes"] = best_candidate.get("attributes", {})
            best_candidate["attributes"]["prompt_skill"] = best_prompt

            # anyway we must combine used links
            best_candidate["human_attributes"] = best_candidate.get(
                "human_attributes", {})
            best_candidate["human_attributes"] = join_used_links_in_attributes(
                best_candidate["human_attributes"],
                best_prompt.get("human_attributes", {}))
            if len(best_candidate["human_attributes"]["used_links"]) == 0:
                best_candidate["human_attributes"].pop("used_links")

    was_ackn = if_acknowledgement_in_previous_bot_utterance(dialog)
    best_resp_cont_ackn = "acknowledgement" in best_candidate.get(
        "response_parts", [])

    if (ADD_ACKNOWLEDGMENTS_IF_POSSIBLE and acknowledgement_hypothesis
            and acknowledgement_decision(all_user_intents)
            and n_sents_without_prompt == 1 and not was_ackn
            and not best_resp_cont_ackn):
        logger.info(
            "Acknowledgement is given, Final hypothesis contains only 1 sentence, no ackn in prev bot uttr,"
            "and we decided to add an acknowledgement to the best candidate.")
        best_candidate[
            "text"] = f'{acknowledgement_hypothesis["text"]} {best_candidate["text"]}'
        best_candidate["response_parts"] = ["acknowledgement"
                                            ] + best_candidate.get(
                                                "response_parts", [])

    return best_candidate, best_cand_id, curr_single_scores
Beispiel #20
0
def get_entities_with_attitudes(annotated_uttr: dict,
                                prev_annotated_uttr: dict):
    entities_with_attitudes = {"like": [], "dislike": []}
    all_entities = get_entities(annotated_uttr,
                                only_named=False,
                                with_labels=False)
    all_prev_entities = get_entities(prev_annotated_uttr,
                                     only_named=False,
                                     with_labels=False)
    logger.info(
        f"Consider all curr entities: {all_entities}, and all previous entities: {all_prev_entities}"
    )
    curr_entity = all_entities[0] if all_entities else ""
    prev_entity = all_prev_entities[-1] if all_prev_entities else ""
    curr_uttr_text = annotated_uttr.get("text", "")
    prev_uttr_text = prev_annotated_uttr.get("text", "")
    curr_sentiment = get_sentiment(annotated_uttr,
                                   probs=False,
                                   default_labels=["neutral"])[0]
    current_first_sentence = (annotated_uttr.get("annotations", {}).get(
        "sentseg", {}).get("segments", [curr_uttr_text])[0])

    if "?" in current_first_sentence:
        pass
    elif WHAT_FAVORITE_PATTERN.search(prev_uttr_text):
        # what is your favorite ..? - animals -> `like animals`
        entities_with_attitudes["like"] += [curr_entity]
    elif WHAT_LESS_FAVORITE_PATTERN.search(prev_uttr_text):
        # what is your less favorite ..? - animals -> `dislike animals`
        entities_with_attitudes["dislike"] += [curr_entity]
    elif DO_YOU_LOVE_PATTERN.search(prev_uttr_text):
        if is_no(annotated_uttr):
            # do you love .. animals? - no -> `dislike animals`
            entities_with_attitudes["dislike"] += [prev_entity]
        elif is_yes(annotated_uttr):
            # do you love .. animals? - yes -> `like animals`
            entities_with_attitudes["like"] += [prev_entity]
    elif DO_YOU_HATE_PATTERN.search(prev_uttr_text):
        if is_no(annotated_uttr):
            # do you hate .. animals? - no -> `like animals`
            entities_with_attitudes["like"] += [prev_entity]
        elif is_yes(annotated_uttr):
            # do you hate .. animals? - yes -> `dislike animals`
            entities_with_attitudes["dislike"] += [prev_entity]
    elif I_HATE_PATTERN.search(curr_uttr_text):
        # i hate .. animals -> `dislike animals`
        entities_with_attitudes["dislike"] += [curr_entity]
    elif I_LOVE_PATTERN.search(curr_uttr_text) or MY_FAVORITE_PATTERN.search(
            curr_uttr_text):
        # i love .. animals -> `like animals`
        entities_with_attitudes["like"] += [curr_entity]
    elif if_chat_about_particular_topic(
            annotated_uttr,
            prev_annotated_uttr=prev_annotated_uttr,
            key_words=[curr_entity]):
        # what do you want to chat about? - ANIMALS -> `like animals`
        entities_with_attitudes["like"] += [curr_entity]
    elif if_not_want_to_chat_about_particular_topic(
            annotated_uttr, prev_annotated_uttr=prev_annotated_uttr):
        # i don't wanna talk about animals -> `dislike animals`
        entities_with_attitudes["dislike"] += [curr_entity]
    elif WHAT_DO_YOU_THINK_PATTERN.search(prev_uttr_text):
        if curr_sentiment == "negative":
            # what do you thank .. animals? - negative -> `dislike animals`
            entities_with_attitudes["dislike"] += [prev_entity]
        elif curr_sentiment == "positive":
            # what do you thank .. animals? - positive -> `like animals`
            entities_with_attitudes["like"] += [prev_entity]

    entities_with_attitudes["like"] = [
        el for el in entities_with_attitudes["like"] if el
    ]
    entities_with_attitudes["dislike"] = [
        el for el in entities_with_attitudes["dislike"] if el
    ]
    return entities_with_attitudes
Beispiel #21
0
def food_fact_response(vars):
    acknowledgements = [
        "I like it too.", "I'm not fond of it.", "It's awesome.", "Fantastic.",
        "Loving it.", "Yummy!"
    ]
    human_utt = state_utils.get_last_human_utterance(vars)
    annotations = human_utt["annotations"]
    human_utt_text = human_utt["text"].lower()
    bot_utt_text = state_utils.get_last_bot_utterance(vars)["text"].lower()

    fact = ""
    berry_name = ""
    entity = ""
    facts = annotations.get("fact_random", [])
    if "berry" in bot_utt_text:
        berry_names = get_entities(state_utils.get_last_human_utterance(vars),
                                   only_named=False,
                                   with_labels=False)
        if berry_names:
            berry_name = berry_names[0]

        if all([
                "berry" not in human_utt_text,
                len(human_utt_text.split()) == 1, berry_name
        ]):
            berry_name += "berry"
            fact = get_fact(berry_name, f"fact about {berry_name}")
            entity = berry_name
        elif berry_name:
            if facts:
                fact = facts[0].get("fact", "")
                entity = facts[0].get("entity_substr", "")
    else:
        if facts:
            fact = facts[0].get("fact", "")
            entity = facts[0].get("entity_substr", "")
    try:
        state_utils.set_confidence(vars, confidence=CONF_MIDDLE)
        if re.search(DONOTKNOW_LIKE_RE, human_utt_text):
            state_utils.set_can_continue(vars, continue_flag=CAN_NOT_CONTINUE)
            return error_response(vars)
        # "I have never heard about it. Could you tell me more about that please."
        elif (not fact) and check_conceptnet(vars):
            state_utils.set_can_continue(vars,
                                         continue_flag=CAN_CONTINUE_SCENARIO)
            return "I haven't tried yet. Why do you like it?"
        elif not fact:
            state_utils.set_can_continue(vars, continue_flag=CAN_NOT_CONTINUE)
            return error_response(vars)
        elif fact and entity:
            state_utils.set_can_continue(vars,
                                         continue_flag=CAN_CONTINUE_SCENARIO)
            return f"{entity}. {random.choice(acknowledgements)} {fact}"
        elif fact:
            state_utils.set_can_continue(vars,
                                         continue_flag=CAN_CONTINUE_SCENARIO)
            return f"Okay. {fact}"
        else:
            state_utils.set_can_continue(vars, continue_flag=CAN_NOT_CONTINUE)
            return error_response(vars)
    except Exception as exc:
        logger.exception(exc)
        sentry_sdk.capture_exception(exc)
        return error_response(vars)
Beispiel #22
0
    async def send(self, payload: Dict, callback: Callable):
        try:
            st_time = time.time()
            dialog = deepcopy(payload["payload"]["dialogs"][0])
            is_sensitive_case = is_sensitive_situation(dialog["human_utterances"][-1])
            all_prev_active_skills = payload["payload"]["all_prev_active_skills"][0]

            curr_topics = get_topics(dialog["human_utterances"][-1], which="cobot_topics")
            curr_nounphrases = get_entities(dialog["human_utterances"][-1], only_named=False, with_labels=False)

            if len(curr_topics) == 0:
                curr_topics = ["Phatic"]
            logger.info(f"Found topics: {curr_topics}")
            for i in range(len(curr_nounphrases)):
                np = re.sub(np_remove_expr, "", curr_nounphrases[i])
                np = re.sub(rm_spaces_expr, " ", np)
                if re.search(np_ignore_expr, np):
                    curr_nounphrases[i] = ""
                else:
                    curr_nounphrases[i] = np.strip()

            curr_nounphrases = [np for np in curr_nounphrases if len(np) > 0]

            logger.info(f"Found nounphrases: {curr_nounphrases}")

            cands = []
            confs = []
            human_attrs = []
            bot_attrs = []
            attrs = []

            cands += [choice(donotknow_answers)]
            confs += [0.5]
            attrs += [{"type": "dummy"}]
            human_attrs += [{}]
            bot_attrs += [{}]

            if len(dialog["utterances"]) > 14 and not is_sensitive_case:
                questions_same_nps = []
                for i, nphrase in enumerate(curr_nounphrases):
                    for q_id in NP_QUESTIONS.get(nphrase, []):
                        questions_same_nps += [QUESTIONS_MAP[str(q_id)]]

                if len(questions_same_nps) > 0:
                    logger.info("Found special nounphrases for questions. Return question with the same nounphrase.")
                    cands += [choice(questions_same_nps)]
                    confs += [0.5]
                    attrs += [{"type": "nounphrase_question"}]
                    human_attrs += [{}]
                    bot_attrs += [{}]

            link_to_question, human_attr = get_link_to_question(dialog, all_prev_active_skills)
            if link_to_question:
                _prev_bot_uttr = dialog["bot_utterances"][-2]["text"] if len(dialog["bot_utterances"]) > 1 else ""
                _bot_uttr = dialog["bot_utterances"][-1]["text"] if len(dialog["bot_utterances"]) > 0 else ""
                _prev_active_skill = (
                    dialog["bot_utterances"][-1]["active_skill"] if len(dialog["bot_utterances"]) > 0 else ""
                )

                _no_to_first_linkto = any([phrase in _bot_uttr for phrase in LINK_TO_PHRASES])
                _no_to_first_linkto = _no_to_first_linkto and all(
                    [phrase not in _prev_bot_uttr for phrase in LINK_TO_PHRASES]
                )
                _no_to_first_linkto = _no_to_first_linkto and is_no(dialog["human_utterances"][-1])
                _no_to_first_linkto = _no_to_first_linkto and _prev_active_skill != "dff_friendship_skill"

                _if_switch_topic = is_switch_topic(dialog["human_utterances"][-1])
                bot_uttr_dict = dialog["bot_utterances"][-1] if len(dialog["bot_utterances"]) > 0 else {}
                _if_choose_topic = if_choose_topic(dialog["human_utterances"][-1], bot_uttr_dict)
                _is_ask_me_something = ASK_ME_QUESTION_PATTERN.search(dialog["human_utterances"][-1]["text"])

                if len(dialog["human_utterances"]) > 1:
                    _was_cant_do = "cant_do" in get_intents(dialog["human_utterances"][-2]) and (
                        len(curr_nounphrases) == 0 or is_yes(dialog["human_utterances"][-1])
                    )
                    _was_cant_do_stop_it = "cant_do" in get_intents(dialog["human_utterances"][-2]) and is_no(
                        dialog["human_utterances"][-1]
                    )
                else:
                    _was_cant_do = False
                    _was_cant_do_stop_it = False

                if _was_cant_do_stop_it:
                    link_to_question = "Sorry, bye! #+#exit"
                    confs += [1.0]  # finish dialog request
                elif _no_to_first_linkto:
                    confs += [0.99]
                elif _is_ask_me_something or _if_switch_topic or _was_cant_do or _if_choose_topic:
                    confs += [1.0]  # Use it only as response selector retrieve skill output modifier
                else:
                    confs += [0.05]  # Use it only as response selector retrieve skill output modifier
                cands += [link_to_question]
                attrs += [{"type": "link_to_for_response_selector"}]
                human_attrs += [human_attr]
                bot_attrs += [{}]

            facts_same_nps = []
            for i, nphrase in enumerate(curr_nounphrases):
                for fact_id in NP_FACTS.get(nphrase, []):
                    facts_same_nps += [
                        f"Well, now that you've mentioned {nphrase}, I've remembered this. {FACTS_MAP[str(fact_id)]}. "
                        f"{(opinion_request_question() if random.random() < ASK_QUESTION_PROB else '')}"
                    ]

            if len(facts_same_nps) > 0 and not is_sensitive_case:
                logger.info("Found special nounphrases for facts. Return fact with the same nounphrase.")
                cands += [choice(facts_same_nps)]
                confs += [0.5]
                attrs += [{"type": "nounphrase_fact"}]
                human_attrs += [{}]
                bot_attrs += [{}]

            total_time = time.time() - st_time
            logger.info(f"dummy_skill exec time: {total_time:.3f}s")
            asyncio.create_task(
                callback(task_id=payload["task_id"], response=[cands, confs, human_attrs, bot_attrs, attrs])
            )
        except Exception as e:
            logger.exception(e)
            sentry_sdk.capture_exception(e)
            asyncio.create_task(callback(task_id=payload["task_id"], response=e))
Beispiel #23
0
def respond():
    print("response generation started")
    st_time = time.time()
    dialogs_batch = request.json["dialogs"]
    # following 3 lists have len = number of samples going to the model
    annotations_depths = []
    dial_ids = []
    input_batch = []
    # following 4 lists have len = len(dialogs_batch)
    entities = []
    lets_chat_about_flags = []
    nounphrases = []
    special_intents_flags = []
    chosen_topics = {}
    for d_id, dialog in enumerate(dialogs_batch):
        try:
            user_input_text = dialog["human_utterances"][-1]["text"]
            bot_uttr = dialog["bot_utterances"][-1] if len(
                dialog["bot_utterances"]) > 0 else {}
            switch_choose_topic = if_choose_topic(
                dialog["human_utterances"][-1], bot_uttr)
            # spacy_nounphrases
            spacy_nounphrases = get_spacy_nounphrases(
                dialog["human_utterances"][-1])
            nounphrases.append(
                re.compile(join_sentences_in_or_pattern(spacy_nounphrases), re.
                           IGNORECASE) if spacy_nounphrases else "")
            # entities
            curr_ents = get_named_entities(dialog["human_utterances"][-1])
            entities.append(
                re.compile(join_sentences_in_or_pattern(curr_ents), re.
                           IGNORECASE) if curr_ents else "")
            # intents
            lets_chat_about_flag, special_intents_flag = get_intents_flags(
                dialog["human_utterances"][-1])
            lets_chat_about_flags.append(lets_chat_about_flag)
            special_intents_flags.append(special_intents_flag)

            anntr_history_len = DEFAULT_ANNTR_HISTORY_LEN
            bot_uttrs_for_dff_check = dialog["bot_utterances"][-2:] if len(
                dialog["bot_utterances"]) > 1 else []
            dffs_flag = check_dffs(bot_uttrs_for_dff_check)
            if lets_chat_about_flag or switch_choose_topic:
                anntr_history_len = 0
            elif dffs_flag:
                anntr_history_len = DFF_ANNTR_HISTORY_LEN
            # if detected lets_chat is about topic from the file
            lets_chat_topic = get_lets_chat_topic(
                lets_chat_about_flag, dialog["human_utterances"][-1])
            # if prev skill == news_api_skill get news description and create knowledge fact
            news_api_fact = get_news_api_fact(
                bot_uttr, dialog["human_utterances"],
                not (switch_choose_topic or lets_chat_about_flag))
            # start creating data for kg service
            user_input_history = "\n".join(
                [i["text"] for i in dialog["utterances"]])

            annotators = {
                # "odqa": "answer_sentence",
                # "kbqa": "answer"
            }
            if not switch_choose_topic:
                user_input_knowledge, annotations_depth = get_knowledge_from_annotators(
                    annotators, dialog["utterances"], anntr_history_len)
            else:
                user_input_knowledge = ""
                annotations_depth = {}
            # add nounphrases and entities to the knowledge
            if user_input_knowledge:
                user_input_checked_sentence = (
                    space_join(spacy_nounphrases) + space_join(curr_ents) +
                    tokenize.sent_tokenize(user_input_knowledge)[0])
            else:
                user_input_checked_sentence = ""

            if user_input_knowledge:
                user_input = {
                    "checked_sentence": user_input_checked_sentence,
                    "knowledge": user_input_knowledge,
                    "text": user_input_text,
                    "history": user_input_history,
                }
                annotations_depths.append(annotations_depth)
                dial_ids.append(d_id)
                input_batch.append(user_input)

            retrieved_facts = get_annotations_from_dialog(
                dialog["utterances"][-anntr_history_len * 2 - 1:],
                "fact_retrieval")
            if retrieved_facts:
                for depth, fact in retrieved_facts[-TOP_N_FACTS:]:
                    user_input = {
                        "checked_sentence": fact,
                        "knowledge": fact,
                        "text": user_input_text,
                        "history": user_input_history,
                    }
                    input_batch.append(user_input)
                    annotations_depths.append({"retrieved_fact": depth})
                    dial_ids.append(d_id)

            if any(
                [switch_choose_topic, lets_chat_topic, lets_chat_about_flag]):
                if lets_chat_topic:
                    fact = random.sample(TOPICS_FACTS[lets_chat_topic], 1)[0]
                    chosen_topics[d_id] = lets_chat_topic
                    _chosen_topic_fact = "lets_chat_cobot_da"
                elif not get_entities(dialog["human_utterances"][-1],
                                      only_named=False,
                                      with_labels=False):
                    topic = random.sample(TOPICS_FACTS.keys(), 1)[0]
                    fact = random.sample(TOPICS_FACTS[topic], 1)[0]
                    chosen_topics[d_id] = topic
                    _chosen_topic_fact = "switch_random"
                else:
                    fact = ""
                if fact:
                    user_input = {
                        "checked_sentence": fact,
                        "knowledge": fact,
                        "text": user_input_text,
                        "history": user_input_history,
                        "chosen_topic_fact": _chosen_topic_fact,
                    }
                    input_batch.append(user_input)
                    annotations_depths.append({})
                    dial_ids.append(d_id)

            if news_api_fact:
                user_input = {
                    "checked_sentence": news_api_fact,
                    "knowledge": news_api_fact,
                    "text": user_input_text,
                    "history": user_input_history,
                    "news_api_fact": True,
                }
                input_batch.append(user_input)
                annotations_depths.append({})
                dial_ids.append(d_id)

            fact_random_facts = get_fact_random(
                dialog["utterances"][-anntr_history_len * 2 - 1:])
            if fact_random_facts:
                user_input = {
                    "checked_sentence": fact_random_facts[-1][1],
                    "knowledge": fact_random_facts[-1][1],
                    "text": user_input_text,
                    "history": user_input_history,
                    "fact_random_fact": True,
                }
                input_batch.append(user_input)
                annotations_depths.append(
                    {"fact_random": fact_random_facts[-1][0]})
                dial_ids.append(d_id)

            user_news = get_news(dialog["human_utterances"][-1], "human")
            bot_news = get_news(dialog["human_utterances"][-1], "bot")
            # all_news = get_news(dialog["human_utterances"][-1], "all")
            if user_news:
                news_desc = user_news[-1].get("decsription", "")
                if news_desc:
                    user_input = {
                        "checked_sentence": news_desc,
                        "knowledge": news_desc,
                        "text": user_input_text,
                        "history": user_input_history,
                        "news_fact": "human ",
                    }
                    input_batch.append(user_input)
                    annotations_depths.append({})
                    dial_ids.append(d_id)
            elif bot_news:
                news_desc = bot_news[-1].get("decsription", "")
                if news_desc:
                    user_input = {
                        "checked_sentence": news_desc,
                        "knowledge": news_desc,
                        "text": user_input_text,
                        "history": user_input_history,
                        "news_fact": "bot ",
                    }
                    input_batch.append(user_input)
                    annotations_depths.append({})
                    dial_ids.append(d_id)
            # elif all_news:
            #     user_input = {
            #         'checked_sentence': all_news[-1].get("decsription", ""),
            #         'knowledge': all_news[-1].get("decsription", ""),
            #         'text': user_input_text,
            #         'history': user_input_history,
            #         'news_fact': "all ",
            #         'news_title': all_news[-1].get("title", "")
            #     }
            #     input_batch.append(user_input)
            #     annotations_depths.append({})
            #     dial_ids.append(d_id)

        except Exception as ex:
            sentry_sdk.capture_exception(ex)
            logger.exception(ex)

    try:
        raw_responses = []
        if input_batch:
            logger.info(f"skill sends to service: {input_batch}")
            resp = requests.post(KNOWLEDGE_GROUNDING_SERVICE_URL,
                                 json={"batch": input_batch},
                                 timeout=1.5)
            raw_responses = resp.json()
            logger.info(f"skill receives from service: {raw_responses}")
        else:
            responses = [[""]]
            confidences = [[0.0]]
            attributes = [[{}]]
            logger.info(
                f"Collected no hypotheses, exiting with {list(zip(responses, confidences, attributes))}"
            )
            return jsonify(list(zip(responses, confidences, attributes)))

        dial_ids = np.array(dial_ids)
        attributes = []
        confidences = []
        responses = []

        for i, dialog in enumerate(dialogs_batch):
            curr_attributes = []
            curr_confidences = []
            curr_responses = []
            for curr_i in np.where(dial_ids == i)[0]:
                attr = {
                    "knowledge_paragraph":
                    input_batch[curr_i]["knowledge"],
                    "knowledge_checked_sentence":
                    input_batch[curr_i]["checked_sentence"],
                    "can_continue":
                    CAN_NOT_CONTINUE,
                    "confidence_case":
                    "",
                }

                already_was_active, short_long_response = get_penalties(
                    dialog["bot_utterances"], raw_responses[curr_i])
                curr_nounphrase_search = nounphrases[i].search(
                    raw_responses[curr_i]) if nounphrases[i] else False
                curr_entities_search = entities[i].search(
                    raw_responses[curr_i]) if entities[i] else False
                no_penalties = False
                fact_random_penalty = 0.0

                topic = chosen_topics.get(i, "")
                chosen_topic_fact_flag = input_batch[curr_i].get(
                    "chosen_topic_fact", "")
                curr_news_fact = input_batch[curr_i].get("news_fact", "")

                add_intro = ""
                if topic and chosen_topic_fact_flag:
                    add_intro = f"Okay, Let's chat about {topic}. "
                    confidence = HIGHEST_CONFIDENCE
                    no_penalties = True
                    attr[
                        "confidence_case"] += f"topic_fact: {chosen_topic_fact_flag} "
                    attr["response_parts"] = ["prompt"]
                elif input_batch[curr_i].get("news_api_fact", ""):
                    add_intro = random.choice([
                        "Sounds like ",
                        "Seems like ",
                        "Makes sense. ",
                        # "Here's what I've heard: ", "Here's something else I've heard: ",
                        "It reminds me that",
                        "This comes to my mind: ",
                        "",
                    ])
                    no_penalties = True
                    confidence = HIGHEST_CONFIDENCE
                    attr["confidence_case"] += "news_api_fact "
                elif input_batch[curr_i].get("fact_random_fact", ""):
                    fact_random_penalty = annotations_depths[curr_i].get(
                        "fact_random", 0.0)
                    confidence = DEFAULT_CONFIDENCE
                    attr["confidence_case"] += "fact_random_fact "
                elif curr_news_fact:
                    if curr_news_fact != "all":
                        confidence = NOUNPHRASE_ENTITY_CONFIDENCE
                    else:
                        confidence = DEFAULT_CONFIDENCE
                        curr_news_title = input_batch[curr_i].get(
                            "news_title", "")
                        if curr_news_title:
                            add_intro = f"I have just read that {curr_news_title}. "
                    attr["confidence_case"] += "news_fact: " + curr_news_fact
                elif (curr_nounphrase_search
                      or curr_entities_search) and lets_chat_about_flags[i]:
                    confidence = HIGHEST_CONFIDENCE
                    attr[
                        "confidence_case"] += "nounphrase_entity_and_lets_chat_about "
                    attr["response_parts"] = ["prompt"]
                elif curr_nounphrase_search or curr_entities_search:
                    confidence = NOUNPHRASE_ENTITY_CONFIDENCE
                    attr["confidence_case"] += "nounphrase_entity "
                elif lets_chat_about_flags[i]:
                    confidence = LETS_CHAT_ABOUT_CONFIDENDENCE
                    attr["confidence_case"] += "lets_chat_about "
                    attr["response_parts"] = ["prompt"]
                else:
                    confidence = DEFAULT_CONFIDENCE
                    attr["confidence_case"] += "default "

                acronym_flag = ABBRS.search(raw_responses[curr_i])
                if acronym_flag:
                    confidence = ABBRS_CONFIDENCE
                    attr["confidence_case"] += f"acronyms: {acronym_flag} "
                    logger.debug(f"KG skill: found acronyms: {acronym_flag}")
                special_char_flag = special_char_re.search(
                    raw_responses[curr_i])
                if special_char_flag:
                    confidence = HAS_SPEC_CHAR_CONFIDENCE
                    attr["confidence_case"] += "special_char "
                    logger.debug(
                        f"KG skill: found special_char: {special_char_flag}")
                if special_intents_flags[i]:
                    confidence = 0.0
                    attr["confidence_case"] += "special_intents "
                    logger.debug("KG skill: found special_intents")
                greetings_farewells_flag = greetings_farewells_re.search(
                    raw_responses[curr_i])
                if greetings_farewells_flag:
                    confidence = 0.0
                    attr["confidence_case"] += "greetings_farewells "
                    logger.debug(
                        f"KG skill: found greetings_farewells: {greetings_farewells_flag}"
                    )

                penalties = (
                    annotations_depths[curr_i].get("retrieved_fact", 0.0) +
                    fact_random_penalty + already_was_active +
                    short_long_response if not no_penalties else 0.0)
                confidence -= penalties
                if any([
                        acronym_flag,
                        special_char_flag,
                        special_intents_flags[i],
                        greetings_farewells_flag,
                        short_long_response,
                ]):
                    logger.debug(
                        f"KG skill: found penalties in response: {raw_responses[curr_i]}, skipping it"
                    )
                    continue
                else:
                    curr_attributes.append(attr)
                    curr_confidences.append(max(0.0, confidence))
                    curr_responses.append(
                        re.sub(r'\s([?.!",;:](?:\s|$))', r"\1",
                               add_intro + raw_responses[curr_i]).replace(
                                   " ' t", "'t"))
            attributes.append(curr_attributes)
            confidences.append(curr_confidences)
            responses.append(curr_responses)

    except Exception as ex:
        sentry_sdk.capture_exception(ex)
        logger.exception(ex)
        responses = [[""]]
        confidences = [[0.0]]
        attributes = [[{}]]

    logger.info(
        f"knowledge_grounding_skill exec time: {time.time() - st_time}")
    return jsonify(list(zip(responses, confidences, attributes)))
Beispiel #24
0
def respond():
    st_time = time.time()
    # to clarify, there's just one (1) dialog returned, not multiple
    dialogs_batch = request.json["dialogs"]
    confidences = []
    responses = []
    attributes = []
    sentences_to_classify = []
    ner_outputs_to_classify = []
    is_factoid_sents = []

    for dialog in dialogs_batch:
        uttr = dialog["human_utterances"][-1]
        # probabilities of being factoid question
        last_phrase = dialog["human_utterances"][-1]["text"]
        if "about" in last_phrase:
            probable_subjects = last_phrase.split("about")[1:]
        else:
            probable_subjects = []
        names = get_entities(dialog["human_utterances"][-1],
                             only_named=True,
                             with_labels=True)
        names = [j["text"].lower() for j in names]
        names = [j for j in names + probable_subjects if j in fact_dict.keys()]
        names = list(set(names))
        nounphrases = get_entities(dialog["human_utterances"][-1],
                                   only_named=False,
                                   with_labels=False)
        factoid_conf = get_factoid(uttr)
        is_factoid_cls = factoid_conf.get("is_factoid", 0.0) > 0.9
        is_factoid = is_factoid_cls and (
            names or nounphrases) and check_factoid(last_phrase)
        is_factoid_sents.append(is_factoid)
        ner_outputs_to_classify.append(names)

    logger.info(f"Ner outputs {ner_outputs_to_classify}")
    fact_outputs = get_random_facts(ner_outputs_to_classify)
    logger.info(f"Fact outputs {fact_outputs}")
    for i in range(len(sentences_to_classify)):
        if asked_about_fact(sentences_to_classify[i]):
            is_factoid_sents[i] = ASKED_ABOUT_FACT_PROB

    # factoid_classes = [cl > FACTOID_CLASS_THRESHOLD for cl in factoid_classes]
    # logger.info('Factoid classes ' + str(factoid_classes))

    questions_batch = []
    facts_batch = []
    question_nums = []
    for n, (dialog, is_factoid, fact_output) in enumerate(
            zip(dialogs_batch, is_factoid_sents, fact_outputs)):
        curr_ann_uttr = dialog["human_utterances"][-1]
        prev_ann_uttr = dialog["bot_utterances"][-1] if len(
            dialog["bot_utterances"]) else {}
        annotations = curr_ann_uttr["annotations"]
        tell_me_about_intent = annotations.get("intent_catcher", {}).get(
            "lets_chat_about", {}).get(
                "detected", 0) == 1 or if_chat_about_particular_topic(
                    curr_ann_uttr, prev_ann_uttr)

        if "sentrewrite" in annotations:
            text_rewritten = annotations["sentrewrite"]["modified_sents"][-1]
        else:
            text_rewritten = curr_ann_uttr["text"]
        is_question = "?" in text_rewritten
        if is_factoid and (tell_me_about_intent or is_question):
            questions_batch.append(curr_ann_uttr["text"])
            facts_batch.append(
                annotations.get("fact_retrieval", {}).get("facts", []))
            question_nums.append(n)

    text_qa_response_batch = [{
        "answer": "",
        "answer_sentence": "",
        "confidence": 0.0
    } for _ in dialogs_batch]
    resp = requests.post(TEXT_QA_URL,
                         json={
                             "question_raw": questions_batch,
                             "top_facts": facts_batch
                         },
                         timeout=0.5)
    if resp.status_code != 200:
        logger.info("API Error: Text QA inaccessible")
    else:
        logger.info("Query against Text QA succeeded")
        text_qa_resp = resp.json()
        text_qa_response_batch = []
        cnt_fnd = 0
        for i in range(len(dialogs_batch)):
            if i in question_nums and cnt_fnd < len(text_qa_resp):
                text_qa_response_batch.append({
                    "answer":
                    text_qa_resp[cnt_fnd][0],
                    "answer_sentence":
                    text_qa_resp[cnt_fnd][3],
                    "confidence":
                    text_qa_resp[cnt_fnd][1],
                })
            else:
                text_qa_response_batch.append({
                    "answer": "",
                    "answer_sentence": "",
                    "confidence": 0.0
                })
    logger.info(f"Response: {resp.json()}")

    kbqa_response = dict()

    for dialog, text_qa_response, is_factoid, fact_output in zip(
            dialogs_batch, text_qa_response_batch, is_factoid_sents,
            fact_outputs):
        attr = {}
        curr_ann_uttr = dialog["human_utterances"][-1]
        prev_ann_uttr = dialog["bot_utterances"][-1] if len(
            dialog["bot_utterances"]) else {}
        tell_me_about_intent = curr_ann_uttr["annotations"].get(
            "intent_catcher", {}).get("lets_chat_about", {}).get(
                "detected", 0) == 1 or if_chat_about_particular_topic(
                    curr_ann_uttr, prev_ann_uttr)

        if "sentrewrite" in curr_ann_uttr["annotations"]:
            curr_uttr_rewritten = curr_ann_uttr["annotations"]["sentrewrite"][
                "modified_sents"][-1]
        else:
            curr_uttr_rewritten = curr_ann_uttr["text"]
        is_question = "?" in curr_uttr_rewritten
        logger.info(
            f"is_factoid {is_factoid} tell_me_about {tell_me_about_intent} is_question {is_question}"
        )
        if is_factoid and (tell_me_about_intent or is_question):
            logger.info(
                "Question is classified as factoid. Querying KBQA and ODQA.")
            print(
                "Question is classified as factoid. Querying KBQA and ODQA...",
                flush=True)
            logger.info(
                f"Using annotators output, kbqa_response {curr_ann_uttr['annotations'].get('kbqa', [])}"
            )
            if use_annotators_output:
                kbqa_response = curr_ann_uttr["annotations"].get("kbqa", {})
                logger.info(
                    f"Using annotators output, kbqa_response {kbqa_response}")
            else:
                futures = []
                executor = concurrent.futures.ThreadPoolExecutor()
                for system in ["kbqa"]:
                    futures.append(
                        executor.submit(getQaResponse, last_phrase, system))
                results = []
                for future in concurrent.futures.as_completed(futures):
                    results.append(future.result())
                for result in results:
                    kbqa_response = result

            response, confidence = qa_choose(last_phrase, text_qa_response,
                                             kbqa_response)
            if len(response) > 300:
                response_cut = ""
                cur_len = 0
                response_split = response.split(", ")
                for piece in response_split:
                    if cur_len + len(piece) < 300:
                        response_cut += f"{piece}, "
                        cur_len += len(piece)
                response = response_cut.rstrip(", ")

            if not response:
                response = random.choice(DONT_KNOW_ANSWER)
                confidence = FACTOID_NOTSURE_CONFIDENCE
                attr["not sure"] = True
        else:
            logger.info("Question is not classified as factoid.")
            response = ""
            confidence = 0.0
        responses.append(response)
        confidences.append(confidence)
        attributes.append(attr)
    logger.info(f"Responses {responses}")
    total_time = time.time() - st_time
    logger.info(f"factoid_qa exec time: {total_time:.3f}s")
    return jsonify(list(zip(responses, confidences, attributes)))
Beispiel #25
0
def get_named_entities_from_human_utterance(vars):
    # ent is a dict! ent = {"text": "London":, "type": "LOC"}
    entities = common_utils.get_entities(
        vars["agent"]["dialog"]["human_utterances"][-1], only_named=True, with_labels=True
    )
    return entities
Beispiel #26
0
def get_nounphrases_from_human_utterance(vars):
    nps = common_utils.get_entities(
        vars["agent"]["dialog"]["human_utterances"][-1], only_named=False, with_labels=False
    )
    return nps
Beispiel #27
0
def process_info(dialog, which_info="name"):
    human_attr = {}
    bot_attr = {}
    attr = {"can_continue": CAN_NOT_CONTINUE}
    response = ""
    confidence = 0.0

    curr_uttr_dict = dialog["human_utterances"][-1]
    curr_user_uttr = curr_uttr_dict["text"].lower()
    curr_user_annot = curr_uttr_dict["annotations"]
    bot_utterance_texts = [u["text"].lower() for u in dialog["bot_utterances"]]
    try:
        prev_bot_uttr = dialog["bot_utterances"][-1]["text"].lower()
    except IndexError:
        prev_bot_uttr = ""

    logger.info(f"Previous bot uterance: {prev_bot_uttr}")
    is_about_templates = {
        "name":
        what_is_your_name_pattern.search(prev_bot_uttr)
        or my_name_is_pattern.search(curr_user_uttr),
        "homeland":
        where_are_you_from_pattern.search(prev_bot_uttr)
        or my_origin_is_pattern.search(curr_user_uttr),
        "location":
        what_is_your_location_pattern.search(prev_bot_uttr)
        or my_location_is_pattern.search(curr_user_uttr),
    }
    response_phrases = {
        "name":
        RESPONSE_PHRASES["name"][0],
        "location":
        RESPONSE_PHRASES["location"][1]
        if RESPONSE_PHRASES["location"][0].lower() in bot_utterance_texts else
        RESPONSE_PHRASES["location"][0],
        "homeland":
        RESPONSE_PHRASES["homeland"][1]
        if RESPONSE_PHRASES["homeland"][0].lower() in bot_utterance_texts else
        RESPONSE_PHRASES["homeland"][0],
    }

    got_info = False
    # if user doesn't want to share his info
    if user_tells_bot_called_him_wrong(curr_uttr_dict, prev_bot_uttr,
                                       dialog["human"]["profile"]):
        logger.info(f"User says My name is not Blabla")
        response = f"My bad. What is your name again?"
        confidence = 1.0
        got_info = True
        attr["can_continue"] = MUST_CONTINUE
    elif (is_about_templates[which_info]
          or was_user_asked_to_clarify_info(prev_bot_uttr, which_info)) and (
              is_no(curr_uttr_dict) or is_secret(curr_user_uttr, which_info)):
        response = "As you wish."
        confidence = 1.0
        attr["can_continue"] = CAN_NOT_CONTINUE
        return response, confidence, human_attr, bot_attr, attr
    elif re.search(r"is that where you live now",
                   prev_bot_uttr) and is_yes(curr_uttr_dict):
        logger.info(f"Found location=homeland")
        if dialog["human"]["attributes"].get("homeland", None):
            human_attr["location"] = dialog["human"]["attributes"]["homeland"]
        else:
            found_homeland = check_entities(
                "homeland",
                curr_user_uttr=dialog["utterances"][-3]["text"].lower(),
                curr_user_annot=dialog["utterances"][-3]["annotations"],
                prev_bot_uttr=dialog["utterances"][-4]["text"].lower(),
            )
            human_attr["location"] = found_homeland
        response = response_phrases["location"]
        confidence = 1.0
        got_info = True
        attr["can_continue"] = MUST_CONTINUE
    elif re.search(r"is that where you live now",
                   prev_bot_uttr) and is_no(curr_uttr_dict):
        logger.info(f"Found location is not homeland")
        response = f"So, where do you live now?"
        confidence = 1.0
        got_info = False
        attr["can_continue"] = MUST_CONTINUE

    if (is_about_templates[which_info] or was_user_asked_to_clarify_info(
            prev_bot_uttr, which_info)) and not got_info:
        logger.info(f"Asked for {which_info} in {prev_bot_uttr}")
        found_info, named_entities_found = check_entities(
            which_info, curr_user_uttr, curr_user_annot, prev_bot_uttr)
        logger.info(
            f"found_info, named_entities_found: {found_info}, {named_entities_found}"
        )
        if which_info == "name" and found_info is not None:
            found_info = filter_unreadable_names(found_info)
        if found_info is None:
            logger.info(f"found_info is None")
            if did_user_misunderstand_bot_question_about_geography(
                    curr_user_uttr, which_info, prev_bot_uttr):
                response = ASK_GEOGRAPHICAL_LOCATION_BECAUSE_USER_MISUNDERSTOOD_BOT[
                    which_info]
                confidence = 0.9
                attr["can_continue"] = CAN_CONTINUE_SCENARIO
            elif which_info in [
                    "homeland", "location"
            ] and NON_GEOGRAPHICAL_LOCATIONS_COMPILED_PATTERN.search(
                    curr_user_uttr):
                response = ""
                confidence = 0.0
                attr["can_continue"] = CAN_NOT_CONTINUE
            elif was_user_asked_to_clarify_info(prev_bot_uttr, which_info):
                response = ""
                confidence = 0.0
                attr["can_continue"] = CAN_NOT_CONTINUE
            elif (which_info == "name" and len(curr_user_uttr.split()) == 1
                  and len(
                      get_entities(curr_uttr_dict,
                                   only_named=False,
                                   with_labels=False)) > 0):
                response = "I've never heard about this name."
                confidence = 1.0
                attr["can_continue"] = MUST_CONTINUE
            else:
                response = REPEAT_INFO_PHRASES[which_info]
                confidence = 1.0
                attr["can_continue"] = MUST_CONTINUE
        else:
            if which_info == "name":
                found_info = shorten_long_names(found_info)
                response = response_phrases[which_info] + found_info + "."
                confidence = 1.0
                attr["can_continue"] = MUST_CONTINUE
                human_attr[which_info] = found_info
            else:
                if NON_GEOGRAPHICAL_LOCATIONS_COMPILED_PATTERN.search(
                        found_info):
                    if did_user_misunderstand_bot_question_about_geography(
                            found_info, which_info, prev_bot_uttr):
                        response = ASK_GEOGRAPHICAL_LOCATION_BECAUSE_USER_MISUNDERSTOOD_BOT[
                            which_info]
                        confidence = 0.9
                        attr["can_continue"] = CAN_CONTINUE_SCENARIO
                    else:
                        response = ""
                        confidence = 0.0
                        attr["can_continue"] = CAN_NOT_CONTINUE
                else:
                    if which_info == "location":
                        response = response_phrases[which_info]
                    elif which_info == "homeland":
                        if dialog["human"]["profile"].get("location",
                                                          None) is None:
                            response = response_phrases[which_info]
                        else:
                            response = response_phrases["location"]
                    human_attr[which_info] = found_info
                    if named_entities_found:
                        confidence = 1.0
                        attr["can_continue"] = MUST_CONTINUE
                    else:
                        confidence = 0.9
                        attr["can_continue"] = CAN_CONTINUE_SCENARIO
    return response, confidence, human_attr, bot_attr, attr
Beispiel #28
0
def last_human_utt_nounphrases(dialog: Dict) -> List[Dict]:
    # Used by: comet_conceptnet_annotator
    entities = get_entities(dialog["human_utterances"][-1],
                            only_named=False,
                            with_labels=False)
    return [{"nounphrases": [entities]}]
Beispiel #29
0
def food_fact_response(vars):
    human_utt = state_utils.get_last_human_utterance(vars)
    annotations = human_utt["annotations"]
    human_utt_text = human_utt["text"].lower()
    bot_utt_text = state_utils.get_last_bot_utterance(vars)["text"]
    shared_memory = state_utils.get_shared_memory(vars)
    used_facts = shared_memory.get("used_facts", [])

    fact = ""
    facts = []
    entity = ""
    berry_name = ""

    linkto_check = any([linkto in bot_utt_text for linkto in link_to_skill2i_like_to_talk["dff_food_skill"]])
    black_list_check = any(list(annotations.get("badlisted_words", {}).values()))
    conceptnet_flag, food_item = check_conceptnet(vars)

    entities_facts = annotations.get("fact_retrieval", {}).get("topic_facts", [])
    for entity_facts in entities_facts:
        if entity_facts["entity_type"] in ["food", "fruit", "vegetable", "berry"]:
            if entity_facts["facts"]:
                facts = entity_facts["facts"][0].get("sentences", [])
                entity = entity_facts["entity_substr"]
            else:
                facts = []

    if not facts:
        facts = annotations.get("fact_random", [])

    if black_list_check:
        state_utils.set_can_continue(vars, continue_flag=CAN_NOT_CONTINUE)
        return error_response(vars)
    elif conceptnet_flag and all(["shower" not in human_utt_text, " mela" not in human_utt_text]):
        if "berry" in bot_utt_text.lower():
            berry_names = get_entities(state_utils.get_last_human_utterance(vars), only_named=False, with_labels=False)
            if berry_names:
                berry_name = berry_names[0]

            if all(["berr" not in human_utt_text, len(human_utt_text.split()) == 1, berry_name]):
                berry_name += "berry"
                fact = get_fact(berry_name, f"fact about {berry_name}")
                entity = berry_name
            elif berry_name:
                if facts and entity:
                    fact = random.choice([i for i in facts if i not in used_facts])
                    # facts[0]
                elif facts:
                    for facts_item in facts:
                        if all(
                            [
                                facts_item.get("entity_substr", "xxx") in food_item,
                                facts_item.get("fact", "") not in used_facts,
                            ]
                        ):
                            fact = facts_item.get("fact", "")
                            entity = facts_item.get("entity_substr", "")
                            break
                        else:
                            fact = ""
                            entity = ""
        else:
            if all([facts, entity, entity in food_item]):
                fact = random.choice([i for i in facts if i not in used_facts])
                # facts[0]
            elif facts and not entity:
                for facts_item in facts:
                    if all(
                        [
                            facts_item.get("entity_substr", "xxx") in food_item,
                            facts_item.get("fact", "") not in used_facts,
                        ]
                    ):
                        fact = facts_item.get("fact", "")
                        entity = facts_item.get("entity_substr", "")
                        break
                    else:
                        fact = ""
                        entity = ""
            else:
                fact = ""
                entity = ""
        acknowledgement = random.choice(FOOD_FACT_ACKNOWLEDGEMENTS).replace("ENTITY", entity.lower())
        state_utils.save_to_shared_memory(vars, used_facts=used_facts + [fact])

        try:
            if bot_persona_fav_food_check(vars) or len(state_utils.get_last_human_utterance(vars)["text"].split()) == 1:
                state_utils.set_confidence(vars, confidence=CONF_HIGH)
            else:
                state_utils.set_confidence(vars, confidence=CONF_MIDDLE)
            if bool(re.search(DONOTKNOW_LIKE_RE, human_utt_text)):
                state_utils.set_confidence(vars, confidence=CONF_MIDDLE)
                state_utils.set_can_continue(vars, continue_flag=CAN_CONTINUE_SCENARIO)
                return "Well, as for me, I am a fan of pizza despite I cannot eat as humans."
            elif any([dont_want_talk(vars), bool(re.search(NO_WORDS_RE, human_utt_text))]):
                state_utils.set_can_continue(vars, continue_flag=CAN_NOT_CONTINUE)
                return error_response(vars)
            elif (not fact) and conceptnet_flag:
                state_utils.set_can_continue(vars, continue_flag=CAN_CONTINUE_SCENARIO)
                return "Why do you like it?"
            elif not fact:
                state_utils.set_can_continue(vars, continue_flag=CAN_NOT_CONTINUE)
                return error_response(vars)
            elif fact and entity:
                state_utils.set_can_continue(vars, continue_flag=CAN_CONTINUE_SCENARIO)
                if len(used_facts):
                    return f"{fact} Do you want me to tell you more about {entity}?"
                else:
                    response = acknowledgement + f"{fact} Do you want to hear more about {entity}?"
                    state_utils.add_acknowledgement_to_response_parts(vars)
                    return response
            elif fact:
                state_utils.set_can_continue(vars, continue_flag=CAN_CONTINUE_SCENARIO)
                if len(used_facts):
                    return f"{fact} Do you want me to tell you more about {entity}?"
                else:
                    return f"Okay. {fact} I can share with you one more cool fact. Do you agree?"
            elif linkto_check:
                state_utils.set_confidence(vars, confidence=CONF_MIDDLE)
                state_utils.set_can_continue(vars, continue_flag=CAN_CONTINUE_SCENARIO)
                return "Sorry. I didn't get what kind of food you have mentioned. Could you repeat it please?"
            else:
                state_utils.set_can_continue(vars, continue_flag=CAN_NOT_CONTINUE)
                return error_response(vars)
        except Exception as exc:
            logger.exception(exc)
            sentry_sdk.capture_exception(exc)
            return error_response(vars)
    elif linkto_check:
        state_utils.set_confidence(vars, confidence=CONF_MIDDLE)
        state_utils.set_can_continue(vars, continue_flag=CAN_CONTINUE_SCENARIO)
        return "Sorry. I didn't get what kind of food you have mentioned. Could you repeat it please?"
    else:
        state_utils.set_can_continue(vars, continue_flag=CAN_NOT_CONTINUE)
        return error_response(vars)