def get_user_initiated_category(user_utterance, current_state) -> Tuple[Optional[str], bool]: """ If the user utterance matches RegexTemplate, return the name of the category they're asking for. Otherwise return None. Returns: category: the category being activated posnav: whether the user has posnav """ slots = CategoriesTemplate().execute(user_utterance) # Legacy code; not removing in case it breaks something if slots is not None and slots["keyword"] in ACTIVATIONPHRASE2CATEGORYNAME: category_name = ACTIVATIONPHRASE2CATEGORYNAME[slots['keyword']] logger.primary_info( f'Detected categories intent for category_name={category_name} and slots={slots}.' ) return category_name, True # If any activation phrase is in the posnav slot, activate with force_start nav_intent = getattr(current_state, 'navigational_intent', None) if nav_intent and nav_intent.pos_intent and nav_intent.pos_topic_is_supplied: pos_topic = nav_intent.pos_topic[0] # str for activation_phrase, category_name in ACTIVATIONPHRASE2CATEGORYNAME.items( ): if contains_phrase(pos_topic, {activation_phrase}, lowercase_text=False, lowercase_phrases=False, remove_punc_text=False, remove_punc_phrases=False): logger.primary_info( f"Detected categories activation phrase '{activation_phrase}' in posnav slot, so categories is activating with force_start" ) return category_name, True # If any activation phrase is in the user utterance, activate with can_start for activation_phrase, category_name in ACTIVATIONPHRASE2CATEGORYNAME.items( ): if contains_phrase(user_utterance, {activation_phrase}, lowercase_text=False, lowercase_phrases=False, remove_punc_text=False, remove_punc_phrases=False): logger.primary_info( f"Detected categories activation phrase '{activation_phrase}' in utterance (but not in a posnav slot), so categories is activating with can_start" ) return category_name, False return None, False
def is_no(self, utterance, pred_proba): """ Args: utterance (String): user's utterance pred_proba (Dict): Dict where keys are dialog acts and values are the predicted probabilities Returns: Bool: whether or not the utterance is a no answer """ if contains_phrase(utterance, NO) or contains_phrase( utterance, NEGATE_YES): return True else: return pred_proba['neg_answer'] >= NO_ANSWER_THRESHOLD
def fits_template_no_elaboration(user_utterance, template) -> bool: """ @param user_utterance: the user's utterance, responding to "how are you feeling?" @param template: A RegexTemplate (not the class, the initialized object) @return: True iff the user's utterance fits the template, and the "remaining" parts of the utterance (i.e. the 'preceder' and 'follower' slots) contain only stopwords or other high frequency words. """ # If it doesn't fit the template, return False slots = template.execute(user_utterance) if slots is None: return False # Get the preceder and follower parts preceder = slots.get('preceder', '').strip() follower = slots.get('follower', '').strip() # If "not" was in the preceder, return False if contains_phrase(preceder, {'not'}): return False # Go through words in the preceder and follower. If you find a "rare" word, return False other_words = preceder.split() + follower.split() for w in other_words: if w in STOPWORDS: continue if w in OTHER_STOPWORDS: continue if get_unigram_freq(w) > 2250: continue return False return True
def is_yes(self, utterance, pred_proba): """ Args: utterance (String): user's utterance pred_proba (Dict): Dict where keys are dialog acts and values are the predicted probabilities Returns: Bool: whether or not the utterance is a yes answer """ # NOTE: we want something like "not correct" to be negative answer if contains_phrase(utterance, YES) and not contains_phrase(utterance, NEGATE_YES): return True else: return pred_proba['pos_answer'] >= YES_ANSWER_THRESHOLD
def get_paraphrases(self, background: str, entity: str, config: dict = {}): """ Args: background: The background information that is to be conversationally paraphrased entity: the entity to be paraphrased Returns: paraphrases: List[str] """ convpara_experiment = self.state_manager.current_state.experiments.look_up_experiment_value( 'convpara') if convpara_experiment == False: return self.get_default_response() history = self.state_manager.current_state.history user_utterance = self.state_manager.current_state.text if len(history) >= 1: history = history[-1:] + [user_utterance] else: logger.warning("ConvPara called with fewer than 2 history turns") return self.get_default_response() input_data = { 'background': background, 'history': history, 'entity': entity, 'config': {} } top_p = self.state_manager.current_state.experiments.look_up_experiment_value( 'convpara_top_p') if top_p == EXPERIMENT_NOT_FOUND: CONVPARA_CONFIG['top_p'] = top_p CONVPARA_CONFIG['seed'] = hash( self.state_manager.current_state.session_id) # Add default config parameters if they were not supplied for k, v in CONVPARA_CONFIG.items(): input_data['config'][k] = config.get(k, v) return_dict = self.remote_call(input_data) if not return_dict: return return_dict paraphrases = [ ConvParaphrase(t, p, f, tt, tp) for t, p, f, tt, tp in zip( return_dict['paraphrases'], return_dict['probabilities'], return_dict['paraphrase_ended'], return_dict['paraphrase_tokens'], return_dict['paraphrase_token_probabilities']) ] logger.primary_info( f"For text {background}, received paraphrases {paraphrases}") paraphrases = list( filter( lambda paraphrase: not contains_phrase( paraphrase.text, {'bye', 'goodbye', 'nice chatting'}), paraphrases)) #paraphrases.sort(key=lambda paraphrase: paraphrase.prob, reverse=True) #Fixme: heuristic checks go here return paraphrases
def contains_offensive( self, text: str, log_message: str = 'text "{}" contains offensive phrase "{}"' ) -> bool: """ Returns True iff text contains an offensive phrase. """ # Lowercase text = text.lower().strip() # Remove whitelisted phrases from text for whitelisted_phrase in WHITELIST_PHRASES: if whitelisted_phrase in text: logger.debug( f'Removing whitelisted phrase "{whitelisted_phrase}" from text "{text}" before checking for offensive phrases' ) text = text.replace(whitelisted_phrase, '').strip() # List of variants of text to check texts = set() # Remove special characters the same way the Amazon code does (leaving * and ' in) texts.add(text.translate({ord(p): '' for p in SPECIAL_CHARS})) # Remove all string.punctuation, replacing with ''. # Unlike the Amazon code, this will catch things like "pissin'". # "pissin" and "pissing" are in our blacklist, but "pissin'" is not. texts.add(text.translate({ord(p): '' for p in string.punctuation})) # Remove all string.punctuation, replacing with ' '. # This will catch things like "f**k-day" or "shit's" where we have an offensive word ("f**k", "shit") connected # via punctuation to a non-offensive word ("day", "s"), and the compound is not in our blacklist. texts.add(' '.join( text.translate({ord(p): ' ' for p in string.punctuation}).split())) # Also check the original text with no punctuation removed # This will catch things like "a$$" which are on our blacklist. # However, it won't catch "a$$" if it occurs next to non-whitespace e.g. "I love a$$." texts.add(text) # Check all the variants for text in texts: if contains_phrase(text, self.blacklist, log_message, lowercase_text=False, lowercase_phrases=False, remove_punc_text=False, remove_punc_phrases=False, max_phrase_len=self.blacklist_max_len): return True return False
def is_yes(self, utterance: str) -> bool: """Quick helper method to return whether the user said yes :param utterance: user's utterance :type utterance: str :return: whether user said yes or not :rtype: bool """ if self.rg.state_manager.current_state.dialog_act['is_yes_answer']: logger.primary_info( 'WIKI has dialog act predicting "is_yes_answer"') return True if contains_phrase(utterance, {'what else is interesting', 'what else'}): return True YES = { "yes", "ok", "sure", 'go on', 'yeah', 'okay', 'all', 'continue', 'yup', 'go ahead' } return contains_phrase(utterance, YES)
def categorize_offense(utterance) -> str: if CriticismTemplate().execute(utterance) is not None: return 'criticism' if SexualOffensesTemplate().execute(utterance) is not None: return 'sexual' if InappropOffensesTemplate().execute(utterance) is not None: return 'inappropriate topic' for offense_type, examples in EXAMPLES_OF_OFFENSES.items(): if offense_type == 'curse' and contains_phrase( utterance, examples): return offense_type elif utterance in examples: return offense_type return 'unknown'
def is_no(self, utterance: str) -> bool: """Quick helper method to return whether the user said no. We say that user said no if 1. User said `no` or a variant of it 2. User did not specify another entity to talk about :param utterance: user's utterance :type utterance: str :return: whether user said no or not :rtype: bool """ if contains_phrase(utterance, {'what else is interesting', 'what else'}): return False if self.rg.state_manager.current_state.dialog_act['is_no_answer']: logger.primary_info( 'WIKI has dialog act predicting "is_no_answer"') return True NO = { "no", "don't", 'neither', 'else', 'nothing', 'nope', 'none', 'not', "don't care" } return contains_phrase(utterance, NO)
def populate_features(self, state : State, utterance : str) -> AdditionalFeatures: """This method populates the additional features that can be extracted through the pipeline. For now it populates 1. A list of detected phrases (including phrases already talked about) 2. Whether user said yes or no in this turn 3. Whether user said like or dislike in this turn :param state: the current state :type state: State :param utterance: the current utterance :type utterance: str :param phrase2entity: the dictionary of phrase -> wiki_entity :type phrase2entity: Dict[str, str] :param entity2phrases: the reverse dictionary of wiki_entity -> phrase :type entity2phrases: Dict[str, List[str]] :return: an additional features object containing all the detected features :rtype: AdditionalFeatures """ additional_features = AdditionalFeatures() # First get the detected phrases linked_spans = self.state_manager.current_state.entity_linker.all_linkedspans # type: ignore linked_wiki_entity_names = set(linked_span.top_ent.name for linked_span in linked_spans) detected_phrases = [phrase.text \ for wiki_entity_name, phrases in self.opinionable_entities.items() if wiki_entity_name in linked_wiki_entity_names\ for phrase in phrases] if len(detected_phrases) > 0: self.logger.primary_info(f'OPINION detected linked phrases {detected_phrases}') # type: ignore else: self.logger.primary_info(f'OPINION did not detect any linked phrases. Will check for non-linked phrases') # type: ignore utterance = self.state_manager.current_state.text detected_phrases = [phrase_text for phrase_text in self.opinionable_phrases if contains_phrase(utterance, set([phrase_text]))] if len(detected_phrases) > 0: self.logger.primary_info(f'OPINION detected nonlinked phrases {detected_phrases}') # type: ignore additional_features.detected_phrases = tuple([phrase for phrase in detected_phrases]) # Then detect whether user said yes if self.state_manager.current_state.dialog_act['is_yes_answer']: # type: ignore self.logger.primary_info(f'OPINION detected user said YES through dialog act') # type: ignore additional_features.detected_yes = True elif utils.is_high_prec_yes(utterance): self.logger.primary_info(f'OPINION detected user said YES through bag of words') # type: ignore additional_features.detected_yes = True else: additional_features.detected_yes = False if self.state_manager.current_state.dialog_act['is_no_answer']: # type: ignore self.logger.primary_info(f'OPINION detected user said NO through dialog act') # type: ignore additional_features.detected_no = True elif utils.is_high_prec_no(utterance): self.logger.primary_info(f'OPINION detected user said NO through bag of words') # type: ignore additional_features.detected_no = True else: additional_features.detected_no = False # Then detect whether user said like or dislike sentiment = self.state_manager.current_state.corenlp['sentiment'] # type: ignore if len(utterance.split(' ')) > 0 and utterance.split(' ')[0] == 'no': no_stripped_matches = re.match(NOS, utterance) no_stripped_utterance = no_stripped_matches.groups()[0] if no_stripped_matches is not None else '' if len(no_stripped_utterance) > 0: self.logger.info(f'Opinion detected user saying no in the beginning, stripped it to {no_stripped_utterance} and rerun sentiment analysis') sentiment = Sentiment.NEUTRAL # First set sentiment to neutral corenlp_module = CorenlpModule(self.state_manager) msg = {'text': no_stripped_utterance, "annotators": ["sentiment"]} response = corenlp_module.execute(msg) if response is not None and 'sentiment' in response: sentiment = response['sentiment'] self.logger.primary_info(f'Opinion detected user opinion sentiment {sentiment}') # type: ignore like, like_reason = utils.is_like(utterance) if like or (sentiment.value > 2 and len([word for word in utterance.split(' ') if word not in ['yes', 'no']]) > 0): additional_features.detected_like = True additional_features.detected_user_gave_reason = like_reason is not None dislike, dislike_reason = utils.is_not_like(utterance) if dislike or (sentiment.value < 2 and len([word for word in utterance.split(' ') if word not in ['yes', 'no']]) > 0): additional_features.detected_dislike = True additional_features.detected_user_gave_reason = dislike_reason is not None if like and state.cur_sentiment < 2: additional_features.detected_user_sentiment_switch = True if dislike and state.cur_sentiment > 2: additional_features.detected_user_sentiment_switch = True if utils.is_high_prec_interest(utterance): additional_features.detected_user_disinterest = False elif utils.is_high_prec_disinterest(utterance): additional_features.detected_user_disinterest = True elif len(utterance.split(' ')) < 4 \ and (not additional_features.detected_like and not additional_features.detected_dislike)\ and not additional_features.detected_yes: additional_features.detected_user_disinterest = True self.logger.primary_info(f'OPINION populated additional features to be {additional_features}') # type: ignore return additional_features
def is_high_prec_yes(utterance: str) -> bool: return contains_phrase(utterance, set(YES))
def comparison_fn_nested_spans( linkedspan1: LinkedSpan, linkedspan2: LinkedSpan, expected_type: Optional[EntityGroup]) -> Set[LinkedSpan]: """ If the two linked spans have nested spans, choose which one to keep. If they have the same protection level: - If the inner LinkedSpan's top_ent is of expected_type, and the outer LinkedSpan's top_ent is not, and the inner LinkedSpan has a score above SCORE_THRESHOLD_CHOOSE_INNER_SPAN_OF_TYPE, keep the inner one. - If the larger one has a score below SCORE_THRESHOLD_ELIMINATE_OUTER_SPAN, and the inner one has a score above SCORE_THRESHOLD_HIGHPREC, keep the inner one. - Otherwise, keep the larger one. If they have different protection levels, keep the more protected one. If the LinkedSpans have the same top_ent, set the surviving LinkedSpan's top_ent_score to be the max of the two. Returns: set of LinkedSpans to keep """ l1_contains_l2 = contains_phrase(linkedspan1.span, {linkedspan2.span}, '', lowercase_text=False, lowercase_phrases=False, remove_punc_text=False, remove_punc_phrases=False) l2_contains_l1 = contains_phrase(linkedspan2.span, {linkedspan1.span}, '', lowercase_text=False, lowercase_phrases=False, remove_punc_text=False, remove_punc_phrases=False) if l2_contains_l1 or l1_contains_l2: if linkedspan1.protection_level == linkedspan2.protection_level: (outer_linkedspan, inner_linkedspan) = ( linkedspan2, linkedspan1) if l2_contains_l1 else (linkedspan1, linkedspan2) if expected_type and expected_type.matches( inner_linkedspan.top_ent ) and not expected_type.matches( outer_linkedspan.top_ent ) and inner_linkedspan.top_ent_score > SCORE_THRESHOLD_CHOOSE_INNER_SPAN_OF_TYPE: logger.info( f'Removing {outer_linkedspan} from high prec set because it contains {inner_linkedspan}, ' f'the outer one is not of expected_type={expected_type}, the inner one is of expected_type, ' f'and the inner one has score over {SCORE_THRESHOLD_CHOOSE_INNER_SPAN_OF_TYPE}' ) return take_max_score_and_return(inner_linkedspan, outer_linkedspan) if outer_linkedspan.top_ent_score < SCORE_THRESHOLD_ELIMINATE_OUTER_SPAN and inner_linkedspan.top_ent_score > SCORE_THRESHOLD_HIGHPREC: logger.info( f'Removing {outer_linkedspan} from high prec set because it contains {inner_linkedspan}, ' f'the outer one has a score below {SCORE_THRESHOLD_ELIMINATE_OUTER_SPAN}, and ' f'the inner one has a score above {SCORE_THRESHOLD_HIGHPREC}' ) return take_max_score_and_return(inner_linkedspan, outer_linkedspan) else: logger.info( f'Removing {inner_linkedspan} from high prec set because it is nested inside {outer_linkedspan}' ) return take_max_score_and_return(outer_linkedspan, inner_linkedspan) elif linkedspan1.protection_level < linkedspan2.protection_level: logger.info( f'Removing {linkedspan1} from high prec set because it is nested with more protected {linkedspan2}' ) return take_max_score_and_return(linkedspan2, linkedspan1) else: logger.info( f'Removing {linkedspan2} from high prec set because it is nested with more protected {linkedspan1}' ) return take_max_score_and_return(linkedspan1, linkedspan2) else: return set([linkedspan1, linkedspan2])
def contains_advice(response): return contains_phrase( response, ADVICE_PHRASES, 'Eliminating GPT2ED response "{}" because it contains bad phrase "{}"')
def get_response(self, state: dict) -> ResponseGeneratorResult: utterance = self.state_manager.current_state.text.lower() nav_intent_output = self.state_manager.current_state.navigational_intent if self.talk_about_george_floyd(state, utterance): blm_entity = get_entity_by_wiki_name("Black Lives Matter") return ResponseGeneratorResult(text=RESPONSE_TO_QUESTION_ONE_GEORGE_FLOYD, priority=ResponsePriority.FORCE_START, needs_prompt=True, state=state, cur_entity=blm_entity, conditional_state={"talked_about_blm": True}, smooth_handoff=SmoothHandoff.ONE_TURN_TO_WIKI_GF) # Check for chatty phrases in utterance slots = ChattyTemplate().execute(utterance) my_name_slots = MyNameIsNonContextualTemplate().execute(utterance) not_my_name_slots = MyNameIsNotTemplate().execute(utterance) if slots is not None: chatty_phrase = slots["chatty_phrase"] logger.primary_info('Detected chatty phrase intent with slots={}'.format(slots)) # Step 3: Get response from dictionary of hand-written responses response, needs_prompt = one_turn_responses[chatty_phrase] logger.primary_info('Chatty RG returned user_response={}'.format(response)) # Check for user hesitating while trying to navigate to a topic elif nav_intent_output.pos_intent and nav_intent_output.pos_topic_is_hesitate and "depends on" not in utterance: logger.primary_info('User has PositiveNavigationalIntent with topic=HESITATE, so asking them for topic again') response, needs_prompt = "I think I missed the last part of that sentence. Can you tell me one more time what you want to talk about?", False # Check for user giving general positive talking intent (e.g. "i want to chat") # If WIKI is supposed to handle the utterance and it contains tell, it typically means user is asking for more info (and hence doesn't really specify topic) elif nav_intent_output.pos_intent and nav_intent_output.pos_topic is None and not (self.state_manager.last_state_active_rg == 'WIKI' and contains_phrase(utterance, {'tell'})): logger.primary_info('User has PositiveNavigationalIntent with topic=None, so ONE_TURN_HACK is responding with "What would you like to talk about?"') response, needs_prompt = "Ok, I'd love to talk to you! What would you like to talk about?", False # Check for user correcting their name elif (my_name_slots and self.state_manager.last_state_active_rg and not self.state_manager.last_state_active_rg == 'LAUNCH') or not_my_name_slots: logger.primary_info('User is attempting to correct name.') response = "Oops, it sounds like I got your name wrong. I'm so sorry about that! I won't make that mistake again." needs_prompt = True setattr(self.state_manager.user_attributes, 'name', None) # Otherwise return empty else: return emptyResult(state) # Step 7: set priority priority = ResponsePriority.FORCE_START is_safe = True # Step 8: return result return ResponseGeneratorResult(text=response, priority=priority, needs_prompt=needs_prompt, state=state, cur_entity=None, conditional_state=state)
def talk_about_george_floyd(self, state: dict, utterance: str) -> bool: if "talked_about_blm" in state and state["talked_about_blm"]: return False return contains_phrase(utterance, ['floyd', 'floyds', "floyd's", "ahmaud", "arbery", "arberys", "breonna"]) and \ contains_phrase(utterance, ['know', 'talk', 'tell', 'think', 'you'])
def is_high_prec_no(utterance: str) -> bool: return contains_phrase(utterance, set(NO))
def is_high_prec_neutral(utterance: str) -> bool: return contains_phrase(utterance, set(NEUTRAL))