def get_response(self, user_id='', text='', context=None, article='', **kwargs): logging.info('--------------------------------') logging.info('Generating DE (human) response for user %s.' % user_id) text = utils.tokenize_utterance(text.strip().lower()) context.append(text) logging.info('Using context: %s' % ' '.join(list(context))) response_set_str = self.cached_retrieved_data['r'] response_set_embs = self.cached_retrieved_data['r_embs'] cached_retrieved_data = self.model.retrieve( context_set=[' </s> '.join(list(context))], response_set=response_set_str, response_embs=response_set_embs, k=1, batch_size=1, verbose=False) response = cached_retrieved_data['r_retrieved'][0][0] # remove all tags to avoid having <unk> response = self._format_to_user(response) # add appropriate tags to the response in the context context.append(response) logging.info('Response: %s' % response) return response, context
def stylistic_features(): data = [] data_target = [] for pair, conversation in speaker_pairs.iteritems(): this_vector = [] replies_x = get_replies(conversation, "x") replies_y = get_replies(conversation, "y") # print pair # # print conversation # # print "" # # print "" # print "REPLIES_X " + replies_x # print "REPLIES_Y " + replies_y # print "" # print "" avg_x = len(utils.tokenize_utterance(replies_x)) / len( conversation ) ##using future import above to have float number out of int division avg_y = len(utils.tokenize_utterance(replies_y)) / len(conversation) x_marker_count = utils.get_liwc_counts_from_utterance(replies_x) y_marker_count = utils.get_liwc_counts_from_utterance(replies_y) this_vector = this_vector + list(x_marker_count) + list(y_marker_count) this_vector.append(avg_x) this_vector.append(avg_y) x = all_utterances[conversation[0][0]] y = all_utterances[conversation[0][1]] if x["is_justice"] and not y["is_justice"]: label = high elif not x["is_justice"] and y["is_justice"]: label = low else: label = error if label != error: data.append(this_vector) data_target.append(label) # print "Data" + str(data) # print "Target" + str(data_target) return (data, data_target)
def count_coordination(utterance_pair): b_utterance = all_utterances[utterance_pair[1]]["utterance"] a_utterance = all_utterances[utterance_pair[0]]["utterance"] b_utter_vec = utils.get_liwc_counts_from_utterance(b_utterance) a_utter_vec = utils.get_liwc_counts_from_utterance(a_utterance) tokenized_b = utils.tokenize_utterance(b_utterance) tokenized_a = utils.tokenize_utterance(a_utterance) # throw this conversation out if difference in utterance length is greater than 20 if abs(len(tokenized_b) - len(tokenized_a)) >= 20: return None coordination_counts = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] for marker_id in range(0, 8): if (a_utter_vec[marker_id] > 0) and (b_utter_vec[marker_id] > 0): coordination_counts[marker_id] = coordination_counts[marker_id] + 1.0 return coordination_counts
def _format_to_model(self, text, context_length): text = utils.tokenize_utterance(text) text = '%s %s </s>' % (self.speaker_token[context_length % 2], text.strip().lower()) return text