Esempio n. 1
0
def test(data, model, params):
    print 'Testing ...'
    sess = tf.Session()
    sess.run(tf.global_variables_initializer())
    ckpt = tf.train.latest_checkpoint(params.tmp_dir)
    print ckpt
    dl.optimistic_restore(sess, ckpt)
    test_x_word, test_x_api, test_y_word_in, test_y_word_out = \
        data.get_batch(data.test_input_id, data.test_api_num, data.test_output_id)
    raw_str = dl.flatten_2D(data.test_usr)
    f = open(os.path.join(params.tmp_dir, 'test_result.txt'), 'w')
    for i in range(data.num_test):
        x_raw_str = raw_str[i]
        x_word = [test_x_word[i]]
        x_api = [test_x_api[i]]
        y_word_in = [test_y_word_in[i]]
        y_word_out = [test_y_word_out[i]]
        # Run encoder just once
        state = sess.run(model.encoder_multi_cell.zero_state(1, tf.float32))
        feed_dict = {
            model.x_word: x_word,
            model.encoder_initial_state: state,
        }
        encoder_last_state = sess.run(model.encoder_last_state, feed_dict)
        state = encoder_last_state
        # state = sess.run(model.decoder_multi_cell.zero_state(1, tf.float32))
        # Run decoder
        answer = ''
        word = '<s>'
        for j in range(params.utc_length):
            x = np.zeros([1, 1])
            x[0, 0] = data.convert(word, data.word2id)
            feed_dict = {
                model.x_api:
                x_api,
                model.y_word_in:
                x,
                # model.encoder_last_state: encoder_last_state,
                model.decoder_initial_state:
                state,
            }
            probs, state = sess.run([model.probs, model.decoder_last_state],
                                    feed_dict)
            p = probs[0]
            word = data.convert(np.argmax(p), data.id2word)
            if word == '</s>':
                break
            answer += word + ' '
        show_str = ('%d\t%s\n\t%s\n' % (i + 1, x_raw_str, answer))
        print(show_str)
        f.write('%s\n' % show_str)
    sess.close()
    f.close()
    print 'Testing ...'
Esempio n. 2
0
def test(data, model, params):
    sess = tf.Session()
    sess.run(tf.global_variables_initializer())
    ckpt = tf.train.latest_checkpoint(params.tmp_dir)
    print ckpt
    dl.optimistic_restore(sess, ckpt)
    test_x_word, test_x_api, test_y_word_in, test_y_word_out = data.get_batch(data.test_input_id,
                                                                              data.test_api_num,
                                                                              data.test_output_id,
                                                                              len(data.test_input_id))

    raw_str = dl.flatten_2D(data.test_usr)
    f = open(os.path.join(params.tmp_dir, 'test_result.txt'), 'w')
    for i in range(data.num_test):
        x_raw_str = raw_str[i]
        x_word = [test_x_word[i]]
        x_api = [test_x_api[i]]
        # Run encoder just once
        answer = ''
        word = '<s>'
        for j in range(params.utc_length):
            x = np.zeros([1, 1])
            x[0, 0] = data.convert(word, data.word2id)
            feed_dict = {
                model.x_word: x_word,
                model.x_api: x_api,
                model.y_word_in: x,
                model.y_len: [params.gen_length],
                model.dropout_keep: 1.0,
            }
            probs = sess.run(model.probs, feed_dict)
            print probs
            p = probs[0]
            word = data.convert(np.argmax(p), data.id2word)
            if word == '</s>':
                break
            answer += word+' '
        show_str = ('%d\t%s\n\t%s\n' % (i + 1, x_raw_str, answer))
        print(show_str)
        f.write('%s\n' % show_str)
    sess.close()
    f.close()
Esempio n. 3
0
    def __init__(self, params):
        self.batch_size = params.batch_size
        self.vocab_size = params.vocab_size
        self.utc_length = params.utc_length
        self.turn_num = params.turn_num
        self.act_size = params.act_size
        self.id2act = [
            'you_are_welcome', 'request_food', 'api', 'reservation', 'hello',
            'inform_address', 'inform_phone', 'request_number', 'on_it',
            'any_help', 'find_options', 'update', 'another_option',
            'recommend', 'request_area', 'request_price'
        ]
        self.word2id, self.id2word = dl.read_word2id(params.vocab_path,
                                                     params.vocab_size)
        # print self.word2id
        self.names, self.values, self.val2attr, self.entities = dl.read_kb_value(
            params.kb_path)
        self.train_usr, self.train_sys, train_api = dl.read_dialog(
            params.train_path)
        self.dev_usr, self.dev_sys, dev_api = dl.read_dialog(params.dev_path)
        self.test_usr, self.test_sys, test_api = dl.read_dialog(
            params.test_path)
        self.train_label = dl.get_template_label(params.train_path,
                                                 params.template_path,
                                                 params.kb_path)
        self.dev_label = dl.get_template_label(params.dev_path,
                                               params.template_path,
                                               params.kb_path)
        self.test_label = dl.get_template_label(params.test_path,
                                                params.template_path,
                                                params.kb_path)
        # Merge the history turns. The number of turns to be merged is decided by params.turn_num
        train_input = dl.merge_dialog(self.train_usr, self.train_sys,
                                      params.turn_num)
        dev_input = dl.merge_dialog(self.dev_usr, self.dev_sys,
                                    params.turn_num)
        test_input = dl.merge_dialog(self.test_usr, self.test_sys,
                                     params.turn_num)
        # Flatten all history of a turn into a single string
        train_input = dl.flatten_history(train_input)
        dev_input = dl.flatten_history(dev_input)
        test_input = dl.flatten_history(test_input)
        # Convert the strings to indexes
        train_input_id = dl.convert_2D_str2id(train_input,
                                              self.word2id,
                                              self.names,
                                              self.val2attr,
                                              params.turn_num *
                                              params.utc_length,
                                              back=True)
        dev_input_id = dl.convert_2D_str2id(dev_input,
                                            self.word2id,
                                            self.names,
                                            self.val2attr,
                                            params.turn_num *
                                            params.utc_length,
                                            back=True)
        test_input_id = dl.convert_2D_str2id(test_input,
                                             self.word2id,
                                             self.names,
                                             self.val2attr,
                                             params.turn_num *
                                             params.utc_length,
                                             back=True)
        # Get number of restaurant in api_call result
        train_api_number = dl.get_api_number(train_api, train_input)
        dev_api_number = dl.get_api_number(dev_api, dev_input)
        test_api_number = dl.get_api_number(test_api, test_input)

        # Flatten the 2D list to 1D (Merge all dialogs into a single list)
        self.train_input_id = dl.flatten_2D(train_input_id)
        self.dev_input_id = dl.flatten_2D(dev_input_id)
        self.test_input_id = dl.flatten_2D(test_input_id)

        self.train_api_num = dl.flatten_2D(train_api_number)
        self.dev_api_num = dl.flatten_2D(dev_api_number)
        self.test_api_num = dl.flatten_2D(test_api_number)

        self.num_train = len(self.train_input_id)
        self.num_dev = len(self.dev_input_id)
        self.num_test = len(self.test_input_id)

        # m = 4
        # print self.train_usr[0]
        # print self.train_sys[0]
        # print self.train_input_id[m]
        # print self.train_label[m]
        # print self.train_api_num[m]

        print '\tNumber of turns: train: %d, dev: %d, test: %d' % (
            self.num_train, self.num_dev, self.num_test)
        self._pointer = 0
Esempio n. 4
0
def test(data, model, params):
    sess = tf.Session()
    sess.run(tf.global_variables_initializer())
    ckpt = tf.train.latest_checkpoint(params.tmp_dir)
    print ckpt
    dl.optimistic_restore(sess, ckpt)
    f_true = open(os.path.join(params.tmp_dir, 'test_result_true.txt'), 'w')
    f_false = open(os.path.join(params.tmp_dir, 'test_result_false.txt'), 'w')
    f_cm = open(os.path.join(params.tmp_dir, 'test_confusion_matrix.txt'), 'w')
    error = 0
    if params.data_opt:
        x_api = data.get_api_vector(data.dev_api_num)
        x_word = data.dev_input_id
        y_act = data.get_act_vector(data.dev_label)
        label = data.dev_label
        usr_utc_list = data.dev_usr
        sys_utc_list = data.dev_sys
        api_list = data.dev_api_num
    else:
        x_api = data.get_api_vector(data.test_api_num)
        x_word = data.test_input_id
        y_act = data.get_act_vector(data.test_label)
        label = data.test_label
        usr_utc_list = data.test_usr
        sys_utc_list = data.test_sys
        api_list = data.test_api_num

    feed_dict = {
        model.x_word: x_word,
        model.x_api: x_api,
        model.y_act: y_act,
        model.dropout_keep: 1.0
    }
    print 'Size of %s Set: %d, %d, %d' % (['Test', 'Dev'][data_opt],
                                          len(x_word), len(x_api), len(y_act))
    loss, prob = sess.run([model.loss, model.prob], feed_dict=feed_dict)
    usr_utc_list = dl.flatten_2D(usr_utc_list)
    sys_utc_list = dl.flatten_2D(sys_utc_list)
    pred_list = np.zeros(len(label))
    true_list = np.asarray(label)
    pred_type_list = []
    true_type_list = []
    for i in range(len(label)):
        p = prob[i]
        act_id = np.argmax(p)
        string = '%s %s\t#%s\t#%s\t%d\n' % (data.id2act[act_id], data.id2act[
            label[i]], usr_utc_list[i], sys_utc_list[i], api_list[i])
        if act_id == label[i]:
            f_true.write(string)
        else:
            error += 1
            f_false.write(string)
        if data.id2act[act_id] not in pred_type_list:
            pred_type_list.append(data.id2act[act_id])
        if data.id2act[label[i]] not in true_type_list:
            true_type_list.append(data.id2act[label[i]])
        pred_list[i] = act_id
    print 'Error rate: %.3f' % (error * 1.0 / len(label))
    print 'Loss: %.3f' % loss
    cm = confusion_matrix(true_list, pred_list)

    # Save the confusion matrix
    f_cm.write('\t\t\t')
    for i in range(2, params.act_size + 1):
        f_cm.write('%5d\t' % i)
    f_cm.write('\n')
    for i in range(params.act_size - 1):
        f_cm.write('%10s\t' % data.id2act[i][:10])
        for j in range(params.act_size - 1):
            f_cm.write('%5d\t' % cm[i, j])
        f_cm.write('\n')

    f_true.close()
    f_false.close()
    f_cm.close()
    sess.close()
Esempio n. 5
0
def test_on_trainset(data, model, params):
    sess = tf.Session()
    sess.run(tf.global_variables_initializer())
    ckpt = tf.train.latest_checkpoint(params.tmp_dir)
    print ckpt
    dl.optimistic_restore(sess, ckpt)
    f_true = open(os.path.join(params.tmp_dir, 'train_result_true.txt'), 'w')
    f_false = open(os.path.join(params.tmp_dir, 'train_result_false.txt'), 'w')
    f_cm = open(os.path.join(params.tmp_dir, 'train_confusion_matrix.txt'),
                'w')
    error = 0
    train_api = data.get_api_vector(data.train_api_num)
    train_act = data.get_act_vector(data.train_label)
    train_feed_dict = {
        model.x_word: data.train_input_id,
        model.x_api: train_api,
        model.y_act: train_act,
        model.dropout_keep: 1.0
    }
    print 'Size of Train Set:', len(
        data.train_input_id), len(train_api), len(train_act)
    prob = sess.run(model.prob, feed_dict=train_feed_dict)
    train_usr = dl.flatten_2D(data.train_usr)
    train_sys = dl.flatten_2D(data.train_sys)
    pred_list = np.zeros(params.train_size)
    true_list = np.asarray(data.train_label)
    pred_type_list = []
    true_type_list = []
    for i in range(params.train_size):
        p = prob[i]
        act_id = np.argmax(p)
        string = '%s %s\t#%s\t#%s\t%d\n' % (
            data.id2act[act_id], data.id2act[data.train_label[i]],
            train_usr[i], train_sys[i], data.train_api_num[i])
        if act_id == data.train_label[i]:
            f_true.write(string)
        else:
            error += 1
            f_false.write(string)
        if data.id2act[act_id] not in pred_type_list:
            pred_type_list.append(data.id2act[act_id])
        if data.id2act[data.train_label[i]] not in true_type_list:
            true_type_list.append(data.id2act[data.train_label[i]])
        pred_list[i] = act_id
        # print error
    # print 'Pred type: ', pred_type_list
    # print 'True type: ', true_type_list
    # print pred_list
    print 'Error rate: %.3f' % (error * 1.0 / params.train_size)
    cm = confusion_matrix(true_list, pred_list)

    # Save the confusion matrix
    f_cm.write('\t\t\t')
    for i in range(2, params.act_size + 1):
        f_cm.write('%5d\t' % i)
    f_cm.write('\n')
    for i in range(params.act_size):
        f_cm.write('%10s\t' % data.id2act[i][:10])
        for j in range(params.act_size):
            f_cm.write('%5d\t' % cm[i, j])
        f_cm.write('\n')

    f_true.close()
    f_false.close()
    f_cm.close()
    sess.close()
Esempio n. 6
0
    def __init__(self, params):
        self.batch_size = params.batch_size
        self.vocab_size = params.vocab_size
        self.utc_length = params.utc_length
        self.turn_num = params.turn_num

        self.word2id, self.id2word = dl.read_word2id(params.vocab_path,
                                                     params.vocab_size)
        self.names, self.values, self.val2attr, self.entities = dl.read_kb_value(
            params.kb_path)
        self.train_usr, self.train_sys, train_api = dl.read_dialog(
            params.train_path)
        self.dev_usr, self.dev_sys, dev_api = dl.read_dialog(params.dev_path)
        self.test_usr, self.test_sys, test_api = dl.read_dialog(
            params.test_path)
        # Merge the history turns. The number of turns to be merged is decided by params.turn_num
        train_input = dl.merge_dialog(self.train_usr, self.train_sys,
                                      params.turn_num)
        dev_input = dl.merge_dialog(self.dev_usr, self.dev_sys,
                                    params.turn_num)
        test_input = dl.merge_dialog(self.test_usr, self.test_sys,
                                     params.turn_num)
        # Flatten all history of a turn into a single string
        train_input = dl.flatten_history(train_input)
        dev_input = dl.flatten_history(dev_input)
        test_input = dl.flatten_history(test_input)
        # Convert the strings to indexes
        train_input_id = dl.convert_2D_str2id(train_input,
                                              self.word2id,
                                              self.names,
                                              self.val2attr,
                                              params.turn_num *
                                              params.utc_length,
                                              back=True)
        dev_input_id = dl.convert_2D_str2id(dev_input,
                                            self.word2id,
                                            self.names,
                                            self.val2attr,
                                            params.turn_num *
                                            params.utc_length,
                                            back=True)
        test_input_id = dl.convert_2D_str2id(test_input,
                                             self.word2id,
                                             self.names,
                                             self.val2attr,
                                             params.turn_num *
                                             params.utc_length,
                                             back=True)
        train_output_id = dl.convert_2D_str2id(self.train_sys,
                                               self.word2id,
                                               self.names,
                                               self.val2attr,
                                               params.utc_length,
                                               add_headrear=True)
        dev_output_id = dl.convert_2D_str2id(self.dev_sys,
                                             self.word2id,
                                             self.names,
                                             self.val2attr,
                                             params.utc_length,
                                             add_headrear=True)
        test_output_id = dl.convert_2D_str2id(self.test_sys,
                                              self.word2id,
                                              self.names,
                                              self.val2attr,
                                              params.utc_length,
                                              add_headrear=True)
        # Get number of restaurant in api_call result
        train_api_number = dl.get_api_number(train_api, train_input)
        dev_api_number = dl.get_api_number(dev_api, dev_input)
        test_api_number = dl.get_api_number(test_api, test_input)

        # Flatten the 2D list to 1D (Merge all dialogs into a single list)
        self.train_input_id = dl.flatten_2D(train_input_id)
        self.dev_input_id = dl.flatten_2D(dev_input_id)
        self.test_input_id = dl.flatten_2D(test_input_id)

        self.train_output_id = dl.flatten_2D(train_output_id)
        self.dev_output_id = dl.flatten_2D(dev_output_id)
        self.test_output_id = dl.flatten_2D(test_output_id)

        self.train_api_num = dl.flatten_2D(train_api_number)
        self.dev_api_num = dl.flatten_2D(dev_api_number)
        self.test_api_num = dl.flatten_2D(test_api_number)

        self.num_train = len(self.train_input_id)
        self.num_dev = len(self.dev_input_id)
        self.num_test = len(self.test_input_id)
        print '\tNumber of turns: train: %d, dev: %d, test: %d' % (
            self.num_train, self.num_dev, self.num_test)
        self._pointer = 0