예제 #1
0
def get_user_initiated_category(user_utterance,
                                current_state) -> Tuple[Optional[str], bool]:
    """
    If the user utterance matches RegexTemplate, return the name of the category they're asking for.
    Otherwise return None.

    Returns:
        category: the category being activated
        posnav: whether the user has posnav
    """
    slots = CategoriesTemplate().execute(user_utterance)

    # Legacy code; not removing in case it breaks something
    if slots is not None and slots["keyword"] in ACTIVATIONPHRASE2CATEGORYNAME:
        category_name = ACTIVATIONPHRASE2CATEGORYNAME[slots['keyword']]
        logger.primary_info(
            f'Detected categories intent for category_name={category_name} and slots={slots}.'
        )
        return category_name, True

    # If any activation phrase is in the posnav slot, activate with force_start
    nav_intent = getattr(current_state, 'navigational_intent', None)
    if nav_intent and nav_intent.pos_intent and nav_intent.pos_topic_is_supplied:
        pos_topic = nav_intent.pos_topic[0]  # str
        for activation_phrase, category_name in ACTIVATIONPHRASE2CATEGORYNAME.items(
        ):
            if contains_phrase(pos_topic, {activation_phrase},
                               lowercase_text=False,
                               lowercase_phrases=False,
                               remove_punc_text=False,
                               remove_punc_phrases=False):
                logger.primary_info(
                    f"Detected categories activation phrase '{activation_phrase}' in posnav slot, so categories is activating with force_start"
                )
                return category_name, True

    # If any activation phrase is in the user utterance, activate with can_start
    for activation_phrase, category_name in ACTIVATIONPHRASE2CATEGORYNAME.items(
    ):
        if contains_phrase(user_utterance, {activation_phrase},
                           lowercase_text=False,
                           lowercase_phrases=False,
                           remove_punc_text=False,
                           remove_punc_phrases=False):
            logger.primary_info(
                f"Detected categories activation phrase '{activation_phrase}' in utterance (but not in a posnav slot), so categories is activating with can_start"
            )
            return category_name, False

    return None, False
예제 #2
0
    def is_no(self, utterance, pred_proba):
        """
        Args:
            utterance (String): user's utterance
            pred_proba (Dict): Dict where keys are dialog acts and values are the predicted probabilities

        Returns:
            Bool: whether or not the utterance is a no answer
        """
        if contains_phrase(utterance, NO) or contains_phrase(
                utterance, NEGATE_YES):
            return True
        else:
            return pred_proba['neg_answer'] >= NO_ANSWER_THRESHOLD
예제 #3
0
def fits_template_no_elaboration(user_utterance, template) -> bool:
    """
    @param user_utterance: the user's utterance, responding to "how are you feeling?"
    @param template: A RegexTemplate (not the class, the initialized object)
    @return: True iff the user's utterance fits the template, and the "remaining" parts of the utterance (i.e. the
        'preceder' and 'follower' slots) contain only stopwords or other high frequency words.
    """

    # If it doesn't fit the template, return False
    slots = template.execute(user_utterance)
    if slots is None:
        return False

    # Get the preceder and follower parts
    preceder = slots.get('preceder', '').strip()
    follower = slots.get('follower', '').strip()

    # If "not" was in the preceder, return False
    if contains_phrase(preceder, {'not'}):
        return False

    # Go through words in the preceder and follower. If you find a "rare" word, return False
    other_words = preceder.split() + follower.split()
    for w in other_words:
        if w in STOPWORDS:
            continue
        if w in OTHER_STOPWORDS:
            continue
        if get_unigram_freq(w) > 2250:
            continue
        return False

    return True
예제 #4
0
    def is_yes(self, utterance, pred_proba):
        """
        Args:
            utterance (String): user's utterance
            pred_proba (Dict): Dict where keys are dialog acts and values are the predicted probabilities

        Returns:
            Bool: whether or not the utterance is a yes answer
        """

        # NOTE: we want something like "not correct" to be negative answer
        if contains_phrase(utterance,
                           YES) and not contains_phrase(utterance, NEGATE_YES):
            return True
        else:
            return pred_proba['pos_answer'] >= YES_ANSWER_THRESHOLD
예제 #5
0
    def get_paraphrases(self, background: str, entity: str, config: dict = {}):
        """
        Args:
            background: The background information that is to be conversationally paraphrased
            entity: the entity to be paraphrased

        Returns:
            paraphrases: List[str]
        """
        convpara_experiment = self.state_manager.current_state.experiments.look_up_experiment_value(
            'convpara')
        if convpara_experiment == False:
            return self.get_default_response()
        history = self.state_manager.current_state.history
        user_utterance = self.state_manager.current_state.text
        if len(history) >= 1:
            history = history[-1:] + [user_utterance]
        else:
            logger.warning("ConvPara called with fewer than 2 history turns")
            return self.get_default_response()
        input_data = {
            'background': background,
            'history': history,
            'entity': entity,
            'config': {}
        }
        top_p = self.state_manager.current_state.experiments.look_up_experiment_value(
            'convpara_top_p')
        if top_p == EXPERIMENT_NOT_FOUND:
            CONVPARA_CONFIG['top_p'] = top_p

        CONVPARA_CONFIG['seed'] = hash(
            self.state_manager.current_state.session_id)

        # Add default config parameters if they were not supplied
        for k, v in CONVPARA_CONFIG.items():
            input_data['config'][k] = config.get(k, v)

        return_dict = self.remote_call(input_data)
        if not return_dict:
            return return_dict

        paraphrases = [
            ConvParaphrase(t, p, f, tt, tp) for t, p, f, tt, tp in zip(
                return_dict['paraphrases'], return_dict['probabilities'],
                return_dict['paraphrase_ended'],
                return_dict['paraphrase_tokens'],
                return_dict['paraphrase_token_probabilities'])
        ]
        logger.primary_info(
            f"For text {background}, received paraphrases {paraphrases}")

        paraphrases = list(
            filter(
                lambda paraphrase: not contains_phrase(
                    paraphrase.text, {'bye', 'goodbye', 'nice chatting'}),
                paraphrases))
        #paraphrases.sort(key=lambda paraphrase: paraphrase.prob, reverse=True)
        #Fixme: heuristic checks go here
        return paraphrases
    def contains_offensive(
            self,
            text: str,
            log_message: str = 'text "{}" contains offensive phrase "{}"'
    ) -> bool:
        """
        Returns True iff text contains an offensive phrase.
        """
        # Lowercase
        text = text.lower().strip()

        # Remove whitelisted phrases from text
        for whitelisted_phrase in WHITELIST_PHRASES:
            if whitelisted_phrase in text:
                logger.debug(
                    f'Removing whitelisted phrase "{whitelisted_phrase}" from text "{text}" before checking for offensive phrases'
                )
                text = text.replace(whitelisted_phrase, '').strip()

        # List of variants of text to check
        texts = set()

        # Remove special characters the same way the Amazon code does (leaving * and ' in)
        texts.add(text.translate({ord(p): '' for p in SPECIAL_CHARS}))

        # Remove all string.punctuation, replacing with ''.
        # Unlike the Amazon code, this will catch things like "pissin'".
        # "pissin" and "pissing" are in our blacklist, but "pissin'" is not.
        texts.add(text.translate({ord(p): '' for p in string.punctuation}))

        # Remove all string.punctuation, replacing with ' '.
        # This will catch things like "f**k-day" or "shit's" where we have an offensive word ("f**k", "shit") connected
        # via punctuation to a non-offensive word ("day", "s"), and the compound is not in our blacklist.
        texts.add(' '.join(
            text.translate({ord(p): ' '
                            for p in string.punctuation}).split()))

        # Also check the original text with no punctuation removed
        # This will catch things like "a$$" which are on our blacklist.
        # However, it won't catch "a$$" if it occurs next to non-whitespace e.g. "I love a$$."
        texts.add(text)

        # Check all the variants
        for text in texts:
            if contains_phrase(text,
                               self.blacklist,
                               log_message,
                               lowercase_text=False,
                               lowercase_phrases=False,
                               remove_punc_text=False,
                               remove_punc_phrases=False,
                               max_phrase_len=self.blacklist_max_len):
                return True
        return False
예제 #7
0
    def is_yes(self, utterance: str) -> bool:
        """Quick helper method to return whether the user said yes

        :param utterance: user's utterance
        :type utterance: str
        :return: whether user said yes or not
        :rtype: bool
        """
        if self.rg.state_manager.current_state.dialog_act['is_yes_answer']:
            logger.primary_info(
                'WIKI has dialog act predicting "is_yes_answer"')
            return True
        if contains_phrase(utterance,
                           {'what else is interesting', 'what else'}):
            return True

        YES = {
            "yes", "ok", "sure", 'go on', 'yeah', 'okay', 'all', 'continue',
            'yup', 'go ahead'
        }
        return contains_phrase(utterance, YES)
예제 #8
0
 def categorize_offense(utterance) -> str:
     if CriticismTemplate().execute(utterance) is not None:
         return 'criticism'
     if SexualOffensesTemplate().execute(utterance) is not None:
         return 'sexual'
     if InappropOffensesTemplate().execute(utterance) is not None:
         return 'inappropriate topic'
     for offense_type, examples in EXAMPLES_OF_OFFENSES.items():
         if offense_type == 'curse' and contains_phrase(
                 utterance, examples):
             return offense_type
         elif utterance in examples:
             return offense_type
     return 'unknown'
예제 #9
0
    def is_no(self, utterance: str) -> bool:
        """Quick helper method to return whether the user said no.
        We say that user said no if
        1. User said `no` or a variant of it
        2. User did not specify another entity to talk about

        :param utterance: user's utterance
        :type utterance: str
        :return: whether user said no or not
        :rtype: bool
        """
        if contains_phrase(utterance,
                           {'what else is interesting', 'what else'}):
            return False
        if self.rg.state_manager.current_state.dialog_act['is_no_answer']:
            logger.primary_info(
                'WIKI has dialog act predicting "is_no_answer"')
            return True

        NO = {
            "no", "don't", 'neither', 'else', 'nothing', 'nope', 'none', 'not',
            "don't care"
        }
        return contains_phrase(utterance, NO)
    def populate_features(self, state : State, utterance : str) -> AdditionalFeatures:
        """This method populates the additional features that can be extracted through the pipeline. For now it populates

        1. A list of detected phrases (including phrases already talked about)
        2. Whether user said yes or no in this turn
        3. Whether user said like or dislike in this turn
        
        :param state: the current state
        :type state: State
        :param utterance: the current utterance
        :type utterance: str
        :param phrase2entity: the dictionary of phrase -> wiki_entity
        :type phrase2entity: Dict[str, str]
        :param entity2phrases: the reverse dictionary of wiki_entity -> phrase
        :type entity2phrases: Dict[str, List[str]]
        :return: an additional features object containing all the detected features
        :rtype: AdditionalFeatures
        """
        additional_features = AdditionalFeatures()

        # First get the detected phrases
        linked_spans = self.state_manager.current_state.entity_linker.all_linkedspans # type: ignore
        linked_wiki_entity_names = set(linked_span.top_ent.name for linked_span in linked_spans)
        detected_phrases = [phrase.text \
            for wiki_entity_name, phrases in self.opinionable_entities.items() if wiki_entity_name in linked_wiki_entity_names\
            for phrase in phrases]
        if len(detected_phrases) > 0:
            self.logger.primary_info(f'OPINION detected linked phrases {detected_phrases}') # type: ignore
        else:
            self.logger.primary_info(f'OPINION did not detect any linked phrases. Will check for non-linked phrases') # type: ignore
            utterance = self.state_manager.current_state.text
            detected_phrases = [phrase_text for phrase_text in self.opinionable_phrases if contains_phrase(utterance, set([phrase_text]))]
            if len(detected_phrases) > 0:
                self.logger.primary_info(f'OPINION detected nonlinked phrases {detected_phrases}') # type: ignore
        additional_features.detected_phrases = tuple([phrase for phrase in detected_phrases])

        # Then detect whether user said yes
        if self.state_manager.current_state.dialog_act['is_yes_answer']: # type: ignore
            self.logger.primary_info(f'OPINION detected user said YES through dialog act') # type: ignore
            additional_features.detected_yes = True
        elif utils.is_high_prec_yes(utterance):
            self.logger.primary_info(f'OPINION detected user said YES through bag of words') # type: ignore
            additional_features.detected_yes = True
        else:
            additional_features.detected_yes = False
        if self.state_manager.current_state.dialog_act['is_no_answer']: # type: ignore
            self.logger.primary_info(f'OPINION detected user said NO through dialog act') # type: ignore
            additional_features.detected_no = True
        elif utils.is_high_prec_no(utterance):
            self.logger.primary_info(f'OPINION detected user said NO through bag of words') # type: ignore
            additional_features.detected_no = True
        else:
            additional_features.detected_no = False
        
        # Then detect whether user said like or dislike
        sentiment = self.state_manager.current_state.corenlp['sentiment']  # type: ignore
        if len(utterance.split(' ')) > 0 and utterance.split(' ')[0] == 'no':
            no_stripped_matches = re.match(NOS, utterance)
            no_stripped_utterance = no_stripped_matches.groups()[0] if no_stripped_matches is not None else ''
            if len(no_stripped_utterance) > 0:
                self.logger.info(f'Opinion detected user saying no in the beginning, stripped it to {no_stripped_utterance} and rerun sentiment analysis')
                sentiment = Sentiment.NEUTRAL # First set sentiment to neutral
                corenlp_module = CorenlpModule(self.state_manager)
                msg = {'text': no_stripped_utterance, "annotators": ["sentiment"]}
                response = corenlp_module.execute(msg)
                if response is not None and 'sentiment' in response:
                    sentiment = response['sentiment']
        self.logger.primary_info(f'Opinion detected user opinion sentiment {sentiment}') # type: ignore

        like, like_reason = utils.is_like(utterance)
        if like or (sentiment.value > 2 and len([word for word in utterance.split(' ') if word not in ['yes', 'no']]) > 0):
            additional_features.detected_like = True
            additional_features.detected_user_gave_reason = like_reason is not None
        dislike, dislike_reason = utils.is_not_like(utterance)
        if dislike or (sentiment.value < 2 and len([word for word in utterance.split(' ') if word not in ['yes', 'no']]) > 0):
            additional_features.detected_dislike = True
            additional_features.detected_user_gave_reason = dislike_reason is not None
        if like and state.cur_sentiment < 2:
            additional_features.detected_user_sentiment_switch = True
        if dislike and state.cur_sentiment > 2:
            additional_features.detected_user_sentiment_switch = True

        if utils.is_high_prec_interest(utterance):
            additional_features.detected_user_disinterest = False
        elif utils.is_high_prec_disinterest(utterance):
            additional_features.detected_user_disinterest = True
        elif len(utterance.split(' ')) < 4 \
                and (not additional_features.detected_like and not additional_features.detected_dislike)\
                and not additional_features.detected_yes:
            additional_features.detected_user_disinterest = True
        self.logger.primary_info(f'OPINION populated additional features to be {additional_features}') # type: ignore
        return additional_features
예제 #11
0
def is_high_prec_yes(utterance: str) -> bool:
    return contains_phrase(utterance, set(YES))
예제 #12
0
def comparison_fn_nested_spans(
        linkedspan1: LinkedSpan, linkedspan2: LinkedSpan,
        expected_type: Optional[EntityGroup]) -> Set[LinkedSpan]:
    """
    If the two linked spans have nested spans, choose which one to keep.
        If they have the same protection level:
            - If the inner LinkedSpan's top_ent is of expected_type, and the outer LinkedSpan's top_ent is not,
                and the inner LinkedSpan has a score above SCORE_THRESHOLD_CHOOSE_INNER_SPAN_OF_TYPE, keep the inner one.
            - If the larger one has a score below SCORE_THRESHOLD_ELIMINATE_OUTER_SPAN, and the inner one has a score above
                SCORE_THRESHOLD_HIGHPREC, keep the inner one.
            - Otherwise, keep the larger one.
        If they have different protection levels, keep the more protected one.
        If the LinkedSpans have the same top_ent, set the surviving LinkedSpan's top_ent_score to be the max of the two.

    Returns:
        set of LinkedSpans to keep
    """
    l1_contains_l2 = contains_phrase(linkedspan1.span, {linkedspan2.span},
                                     '',
                                     lowercase_text=False,
                                     lowercase_phrases=False,
                                     remove_punc_text=False,
                                     remove_punc_phrases=False)
    l2_contains_l1 = contains_phrase(linkedspan2.span, {linkedspan1.span},
                                     '',
                                     lowercase_text=False,
                                     lowercase_phrases=False,
                                     remove_punc_text=False,
                                     remove_punc_phrases=False)
    if l2_contains_l1 or l1_contains_l2:
        if linkedspan1.protection_level == linkedspan2.protection_level:
            (outer_linkedspan, inner_linkedspan) = (
                linkedspan2, linkedspan1) if l2_contains_l1 else (linkedspan1,
                                                                  linkedspan2)
            if expected_type and expected_type.matches(
                    inner_linkedspan.top_ent
            ) and not expected_type.matches(
                    outer_linkedspan.top_ent
            ) and inner_linkedspan.top_ent_score > SCORE_THRESHOLD_CHOOSE_INNER_SPAN_OF_TYPE:
                logger.info(
                    f'Removing {outer_linkedspan} from high prec set because it contains {inner_linkedspan}, '
                    f'the outer one is not of expected_type={expected_type}, the inner one is of expected_type, '
                    f'and the inner one has score over {SCORE_THRESHOLD_CHOOSE_INNER_SPAN_OF_TYPE}'
                )
                return take_max_score_and_return(inner_linkedspan,
                                                 outer_linkedspan)
            if outer_linkedspan.top_ent_score < SCORE_THRESHOLD_ELIMINATE_OUTER_SPAN and inner_linkedspan.top_ent_score > SCORE_THRESHOLD_HIGHPREC:
                logger.info(
                    f'Removing {outer_linkedspan} from high prec set because it contains {inner_linkedspan}, '
                    f'the outer one has a score below {SCORE_THRESHOLD_ELIMINATE_OUTER_SPAN}, and '
                    f'the inner one has a score above {SCORE_THRESHOLD_HIGHPREC}'
                )
                return take_max_score_and_return(inner_linkedspan,
                                                 outer_linkedspan)
            else:
                logger.info(
                    f'Removing {inner_linkedspan} from high prec set because it is nested inside {outer_linkedspan}'
                )
                return take_max_score_and_return(outer_linkedspan,
                                                 inner_linkedspan)
        elif linkedspan1.protection_level < linkedspan2.protection_level:
            logger.info(
                f'Removing {linkedspan1} from high prec set because it is nested with more protected {linkedspan2}'
            )
            return take_max_score_and_return(linkedspan2, linkedspan1)
        else:
            logger.info(
                f'Removing {linkedspan2} from high prec set because it is nested with more protected {linkedspan1}'
            )
            return take_max_score_and_return(linkedspan1, linkedspan2)
    else:
        return set([linkedspan1, linkedspan2])
예제 #13
0
def contains_advice(response):
    return contains_phrase(
        response, ADVICE_PHRASES,
        'Eliminating GPT2ED response "{}" because it contains bad phrase "{}"')
    def get_response(self, state: dict) -> ResponseGeneratorResult:
        utterance = self.state_manager.current_state.text.lower()
        nav_intent_output = self.state_manager.current_state.navigational_intent

        if self.talk_about_george_floyd(state, utterance):
            blm_entity = get_entity_by_wiki_name("Black Lives Matter")
            return ResponseGeneratorResult(text=RESPONSE_TO_QUESTION_ONE_GEORGE_FLOYD, 
                                        priority=ResponsePriority.FORCE_START,
                                        needs_prompt=True, state=state,
                                        cur_entity=blm_entity, conditional_state={"talked_about_blm": True},
                                        smooth_handoff=SmoothHandoff.ONE_TURN_TO_WIKI_GF)

        # Check for chatty phrases in utterance
        slots = ChattyTemplate().execute(utterance)
        my_name_slots = MyNameIsNonContextualTemplate().execute(utterance)
        not_my_name_slots = MyNameIsNotTemplate().execute(utterance)
        if slots is not None:
            chatty_phrase = slots["chatty_phrase"]
            logger.primary_info('Detected chatty phrase intent with slots={}'.format(slots))

            # Step 3: Get response from dictionary of hand-written responses
            response, needs_prompt = one_turn_responses[chatty_phrase]
            logger.primary_info('Chatty RG returned user_response={}'.format(response))

        # Check for user hesitating while trying to navigate to a topic
        elif nav_intent_output.pos_intent and nav_intent_output.pos_topic_is_hesitate and "depends on" not in utterance:
            logger.primary_info('User has PositiveNavigationalIntent with topic=HESITATE, so asking them for topic again')
            response, needs_prompt = "I think I missed the last part of that sentence. Can you tell me one more time what you want to talk about?", False

        # Check for user giving general positive talking intent (e.g. "i want to chat")
        # If WIKI is supposed to handle the utterance and it contains tell, it typically means user is asking for more info (and hence doesn't really specify topic)
        elif nav_intent_output.pos_intent and nav_intent_output.pos_topic is None and not (self.state_manager.last_state_active_rg == 'WIKI' and contains_phrase(utterance, {'tell'})):
            logger.primary_info('User has PositiveNavigationalIntent with topic=None, so ONE_TURN_HACK is responding with "What would you like to talk about?"')
            response, needs_prompt = "Ok, I'd love to talk to you! What would you like to talk about?", False

        # Check for user correcting their name
        elif (my_name_slots and self.state_manager.last_state_active_rg and not self.state_manager.last_state_active_rg == 'LAUNCH') or not_my_name_slots:
            logger.primary_info('User is attempting to correct name.')
            response = "Oops, it sounds like I got your name wrong. I'm so sorry about that! I won't make that mistake again."
            needs_prompt = True
            setattr(self.state_manager.user_attributes, 'name', None)

        # Otherwise return empty
        else:
            return emptyResult(state)

        # Step 7: set priority
        priority = ResponsePriority.FORCE_START
        is_safe = True

        # Step 8: return result
        return ResponseGeneratorResult(text=response, priority=priority, needs_prompt=needs_prompt, state=state,
                                       cur_entity=None, conditional_state=state)
 def talk_about_george_floyd(self, state: dict, utterance: str) -> bool:
     if "talked_about_blm" in state and state["talked_about_blm"]:
         return False
     return contains_phrase(utterance, ['floyd', 'floyds', "floyd's", "ahmaud", "arbery", "arberys", "breonna"]) and \
             contains_phrase(utterance, ['know', 'talk', 'tell', 'think', 'you'])
예제 #16
0
def is_high_prec_no(utterance: str) -> bool:
    return contains_phrase(utterance, set(NO))
예제 #17
0
def is_high_prec_neutral(utterance: str) -> bool:
    return contains_phrase(utterance, set(NEUTRAL))