Esempio n. 1
0
    def get_response(self,
                     user_id='',
                     text='',
                     context=None,
                     article='',
                     **kwargs):
        logging.info('--------------------------------')
        logging.info('Generating DE (human) response for user %s.' % user_id)
        text = utils.tokenize_utterance(text.strip().lower())
        context.append(text)
        logging.info('Using context: %s' % ' '.join(list(context)))

        response_set_str = self.cached_retrieved_data['r']
        response_set_embs = self.cached_retrieved_data['r_embs']

        cached_retrieved_data = self.model.retrieve(
            context_set=[' </s> '.join(list(context))],
            response_set=response_set_str,
            response_embs=response_set_embs,
            k=1,
            batch_size=1,
            verbose=False)
        response = cached_retrieved_data['r_retrieved'][0][0]

        # remove all tags to avoid having <unk>
        response = self._format_to_user(response)
        # add appropriate tags to the response in the context
        context.append(response)
        logging.info('Response: %s' % response)
        return response, context
Esempio n. 2
0
def stylistic_features():
    data = []
    data_target = []
    for pair, conversation in speaker_pairs.iteritems():
        this_vector = []
        replies_x = get_replies(conversation, "x")
        replies_y = get_replies(conversation, "y")

        # print pair
        # # print conversation
        # # print ""
        # # print ""
        # print "REPLIES_X " + replies_x

        # print "REPLIES_Y " + replies_y
        # print ""
        # print ""
        avg_x = len(utils.tokenize_utterance(replies_x)) / len(
            conversation
        )  ##using future import above to have float number out of int division
        avg_y = len(utils.tokenize_utterance(replies_y)) / len(conversation)

        x_marker_count = utils.get_liwc_counts_from_utterance(replies_x)
        y_marker_count = utils.get_liwc_counts_from_utterance(replies_y)

        this_vector = this_vector + list(x_marker_count) + list(y_marker_count)
        this_vector.append(avg_x)
        this_vector.append(avg_y)

        x = all_utterances[conversation[0][0]]
        y = all_utterances[conversation[0][1]]
        if x["is_justice"] and not y["is_justice"]:
            label = high
        elif not x["is_justice"] and y["is_justice"]:
            label = low
        else:
            label = error

        if label != error:
            data.append(this_vector)
            data_target.append(label)

            # print "Data" + str(data)
            # print "Target" + str(data_target)
    return (data, data_target)
Esempio n. 3
0
def count_coordination(utterance_pair):
    b_utterance = all_utterances[utterance_pair[1]]["utterance"]
    a_utterance = all_utterances[utterance_pair[0]]["utterance"]

    b_utter_vec = utils.get_liwc_counts_from_utterance(b_utterance)
    a_utter_vec = utils.get_liwc_counts_from_utterance(a_utterance)

    tokenized_b = utils.tokenize_utterance(b_utterance)
    tokenized_a = utils.tokenize_utterance(a_utterance)

    # throw this conversation out if difference in utterance length is greater than 20
    if abs(len(tokenized_b) - len(tokenized_a)) >= 20:
        return None

    coordination_counts = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
    for marker_id in range(0, 8):

        if (a_utter_vec[marker_id] > 0) and (b_utter_vec[marker_id] > 0):
            coordination_counts[marker_id] = coordination_counts[marker_id] + 1.0

    return coordination_counts
Esempio n. 4
0
 def _format_to_model(self, text, context_length):
     text = utils.tokenize_utterance(text)
     text = '%s %s </s>' % (self.speaker_token[context_length % 2],
                            text.strip().lower())
     return text