Exemple #1
0
        def dev_step():
            results = defaultdict(list)
            num_test = 0
            num_correct = 0.0
            valid_batches = data_helpers.batch_iter(valid_dataset,
                                                    FLAGS.batch_size,
                                                    1,
                                                    target_loss_weight,
                                                    FLAGS.max_utter_len,
                                                    FLAGS.max_utter_num,
                                                    FLAGS.max_response_len,
                                                    charVocab,
                                                    FLAGS.max_word_length,
                                                    shuffle=True)
            for valid_batch in valid_batches:
                x_utterances, x_response, x_utterances_len, x_response_len, x_utters_num, x_target, x_target_weight, id_pairs, x_u_char, x_u_char_len, x_r_char, x_r_char_len = valid_batch
                feed_dict = {
                    imn.utterances: x_utterances,
                    imn.response: x_response,
                    imn.utterances_len: x_utterances_len,
                    imn.response_len: x_response_len,
                    imn.utters_num: x_utters_num,
                    imn.target: x_target,
                    imn.target_loss_weight: x_target_weight,
                    imn.dropout_keep_prob: 1.0,
                    imn.u_charVec: x_u_char,
                    imn.u_charLen: x_u_char_len,
                    imn.r_charVec: x_r_char,
                    imn.r_charLen: x_r_char_len,
                }
                batch_accuracy, predicted_prob = sess.run(
                    [imn.accuracy, imn.probs], feed_dict)
                num_test += len(predicted_prob)
                if num_test % 1000 == 0:
                    print(num_test)

                num_correct += len(predicted_prob) * batch_accuracy
                for i, prob_score in enumerate(predicted_prob):
                    question_id, response_id, label = id_pairs[i]
                    results[question_id].append(
                        (response_id, label, prob_score))

            #calculate top-1 precision
            print('num_test_samples: {}  test_accuracy: {}'.format(
                num_test, num_correct / num_test))
            accu, precision, recall, f1, loss = metrics.classification_metrics(
                results)
            print('Accuracy: {}, Precision: {}  Recall: {}  F1: {} Loss: {}'.
                  format(accu, precision, recall, f1, loss))

            mvp = metrics.mean_average_precision(results)
            mrr = metrics.mean_reciprocal_rank(results)
            top_1_precision = metrics.top_1_precision(results)
            total_valid_query = metrics.get_num_valid_query(results)
            print(
                'MAP (mean average precision: {}\tMRR (mean reciprocal rank): {}\tTop-1 precision: {}\tNum_query: {}'
                .format(mvp, mrr, top_1_precision, total_valid_query))

            return mrr
Exemple #2
0
        def dev_step():
            results = defaultdict(list)
            num_test = 0
            num_correct = 0.0
            valid_batches = data_helpers.batch_iter(valid_dataset, FLAGS.batch_size, 1, FLAGS.max_utter_num, FLAGS.max_utter_len, \
                                      FLAGS.max_response_num, FLAGS.max_response_len, FLAGS.max_persona_num, FLAGS.max_persona_len, \
                                      charVocab, FLAGS.max_word_length, shuffle=True)
            for valid_batch in valid_batches:
                x_utterances, x_utterances_len, x_response, x_response_len, \
                    x_utters_num, x_target, x_ids, \
                    x_u_char, x_u_char_len, x_r_char, x_r_char_len, \
                    x_personas, x_personas_len, x_p_char, x_p_char_len, x_personas_num = valid_batch
                feed_dict = {
                  dim.utterances: x_utterances,
                  dim.utterances_len: x_utterances_len,
                  dim.responses: x_response,
                  dim.responses_len: x_response_len,
                  dim.utters_num: x_utters_num,
                  dim.target: x_target,
                  dim.dropout_keep_prob: 1.0,
                  dim.u_charVec: x_u_char,
                  dim.u_charLen: x_u_char_len,
                  dim.r_charVec: x_r_char,
                  dim.r_charLen: x_r_char_len,
                  dim.personas: x_personas,
                  dim.personas_len: x_personas_len,
                  dim.p_charVec: x_p_char,
                  dim.p_charLen: x_p_char_len,
                  dim.personas_num: x_personas_num
                }
                batch_accuracy, predicted_prob = sess.run([dim.accuracy, dim.probs], feed_dict)

                num_test += len(predicted_prob)
                if num_test % 1000 == 0:
                    print(num_test)
                num_correct += len(predicted_prob) * batch_accuracy

                # predicted_prob = [batch_size, max_response_num]
                for i in range(len(predicted_prob)):
                    probs = predicted_prob[i]
                    us_id = x_ids[i]
                    label = x_target[i]
                    labels = np.zeros(FLAGS.max_response_num)
                    labels[label] = 1
                    for r_id, prob in enumerate(probs):
                        results[us_id].append((str(r_id), labels[r_id], prob))

            #calculate top-1 precision
            print('num_test_samples: {}  test_accuracy: {}'.format(num_test, num_correct/num_test))
            accu, precision, recall, f1, loss = metrics.classification_metrics(results)
            print('Accuracy: {}, Precision: {}  Recall: {}  F1: {} Loss: {}'.format(accu, precision, recall, f1, loss))

            mvp = metrics.mean_average_precision(results)
            mrr = metrics.mean_reciprocal_rank(results)
            top_1_precision = metrics.top_1_precision(results)
            total_valid_query = metrics.get_num_valid_query(results)
            print('MAP (mean average precision: {}\tMRR (mean reciprocal rank): {}\tTop-1 precision: {}\tNum_query: {}'.format(mvp, mrr, top_1_precision, total_valid_query))

            return mrr
        def dev_step():
            results = []
            num_test = 0
            num_correct = 0.0
            valid_batches = data_helpers.batch_iter(valid_dataset, FLAGS.batch_size, 1, SEQ_LEN, shuffle=True)
            for valid_batch in valid_batches:
                x_question, x_answer, x_question_len, x_answer_len, x_lastTurn, x_lastTurn_len, q_id_list, as_id_list, x_target = valid_batch
                feed_dict = {
                  esim.question: x_question,
                  esim.answer: x_answer,
                  esim.question_len: x_question_len,
                  esim.answer_len: x_answer_len,
                  esim.target: x_target,
                  esim.dropout_keep_prob: 1.0,
                  esim.lastTurn: x_lastTurn,
                  esim.lastTurn_len: x_lastTurn_len
                }
                batch_accuracy, predicted_prob = sess.run([esim.accuracy, esim.probs], feed_dict)
                num_test += len(predicted_prob)
                if num_test % 10 == 0:
                    print(num_test)

                num_correct += len(predicted_prob) * batch_accuracy
                results.append( (predicted_prob, x_target) )

            probs_list = []
            labels_list = [] 
            for probs, labels in results:
                probs_list.append(probs)
                labels_list.append(labels)
            probs_aggre = np.concatenate(probs_list, axis=0)
            labels_aggre = np.concatenate(labels_list, axis=0)

            #calculate top-1 precision
            print('num_test_samples: {}  test_accuracy: {}'.format(num_test, num_correct/num_test))
            recall, mrr = recall_metrics.compute_recall(probs_aggre, labels_aggre)
            print('recall@1: {}, recall@2: {}, recall@5: {}, recall@10: {}'.format(recall['@1'], recall['@2'], recall['@5'], recall['@10']))
            
            return recall['@1']
Exemple #4
0
        def check_step(dataset, shuffle=False):
            results = defaultdict(list)
            num_test = 0
            num_correct = 0.0
            batches = data_helpers.batch_iter(dataset,
                                              FLAGS.batch_size,
                                              1,
                                              idf,
                                              SEQ_LEN,
                                              shuffle=shuffle)
            for batch in batches:
                x_premise, x_hypothesis, x_premise_len, x_hypothesis_len, \
                targets, extra_feature, p_features, h_features = batch
                feed_dict = {
                    esim.premise: x_premise,
                    esim.hypothesis: x_hypothesis,
                    esim.premise_len: x_premise_len,
                    esim.hypothesis_len: x_hypothesis_len,
                    esim.target: targets,
                    esim.dropout_keep_prob: 1.0,
                    esim.extra_feature: extra_feature,
                    esim.p_word_feature: p_features,
                    esim.h_word_feature: h_features
                }
                batch_accuracy, predicted_prob = sess.run(
                    [esim.accuracy, esim.probs], feed_dict)
                num_test += len(predicted_prob)
                if num_test % 1000 == 0:
                    print(num_test)

                num_correct += len(predicted_prob) * batch_accuracy

            # calculate Accuracy
            acc = num_correct / num_test
            print('num_test_samples: {}  accuracy: {}'.format(num_test, acc))

            return acc
Exemple #5
0
        r_char_feature = graph.get_operation_by_name(
            "response_char").outputs[0]
        r_char_len = graph.get_operation_by_name(
            "response_char_len").outputs[0]

        # Tensors we want to evaluate
        prob = graph.get_operation_by_name("prediction_layer/prob").outputs[0]

        results = defaultdict(list)
        num_test = 0
        test_batches = data_helpers.batch_iter(test_dataset,
                                               FLAGS.batch_size,
                                               1,
                                               target_loss_weight,
                                               FLAGS.max_utter_len,
                                               FLAGS.max_utter_num,
                                               FLAGS.max_response_len,
                                               charVocab,
                                               FLAGS.max_word_length,
                                               shuffle=False)
        for test_batch in test_batches:
            x_utterances, x_response, x_utterances_len, x_response_len, x_utters_num, \
            x_target, x_target_weight, id_pairs, \
            x_u_char, x_u_char_len, x_r_char, x_r_char_len = test_batch
            feed_dict = {
                utterances: x_utterances,
                response: x_response,
                utterances_len: x_utterances_len,
                response_len: x_response_len,
                utterances_num: x_utters_num,
                dropout_keep_prob: 1.0,
Exemple #6
0
            print('Accuracy: {}, Precision: {}  Recall: {}  F1: {} Loss: {}'.
                  format(accu, precision, recall, f1, loss))

            mvp = metrics.mean_average_precision(results)
            mrr = metrics.mean_reciprocal_rank(results)
            top_1_precision = metrics.top_1_precision(results)
            total_valid_query = metrics.get_num_valid_query(results)
            print(
                'MAP (mean average precision: {}\tMRR (mean reciprocal rank): {}\tTop-1 precision: {}\tNum_query: {}'
                .format(mvp, mrr, top_1_precision, total_valid_query))

            return mrr

        best_mrr = 0.0
        batches = data_helpers.batch_iter(train_dataset, FLAGS.batch_size, FLAGS.num_epochs, FLAGS.max_utter_num, FLAGS.max_utter_len, \
                                          FLAGS.max_response_num, FLAGS.max_response_len, FLAGS.max_persona_num, FLAGS.max_persona_len, \
                                          charVocab, FLAGS.max_word_length, shuffle=True)
        for batch in batches:
            x_utterances, x_utterances_len, x_response, x_response_len, \
                x_utters_num, x_target, x_ids, \
                x_u_char, x_u_char_len, x_r_char, x_r_char_len, \
                x_personas, x_personas_len, x_p_char, x_p_char_len, x_personas_num = batch
            train_step(x_utterances, x_utterances_len, x_response,
                       x_response_len, x_utters_num, x_target, x_ids, x_u_char,
                       x_u_char_len, x_r_char, x_r_char_len, x_personas,
                       x_personas_len, x_p_char, x_p_char_len, x_personas_num)
            current_step = tf.train.global_step(sess, global_step)
            if current_step % FLAGS.evaluate_every == 0:
                print("\nEvaluation:")
                valid_mrr = dev_step()
                if valid_mrr > best_mrr:
Exemple #7
0
        personas = graph.get_operation_by_name("personas").outputs[0]
        personas_len = graph.get_operation_by_name("personas_len").outputs[0]
        personas_num = graph.get_operation_by_name("personas_num").outputs[0]
        p_char_feature = graph.get_operation_by_name("personas_char").outputs[0]
        p_char_len = graph.get_operation_by_name("personas_char_len").outputs[0]
        
        dropout_keep_prob = graph.get_operation_by_name("dropout_keep_prob").outputs[0]
        
        # Tensors we want to evaluate
        pred_prob = graph.get_operation_by_name("prediction_layer/prob").outputs[0]

        results = defaultdict(list)
        num_test = 0
        test_batches = data_helpers.batch_iter(test_dataset, FLAGS.batch_size, 1, FLAGS.max_utter_num, FLAGS.max_utter_len, \
            FLAGS.max_response_num, FLAGS.max_response_len, FLAGS.max_persona_num, FLAGS.max_persona_len, \
            charVocab, FLAGS.max_word_length, shuffle=False)

        for test_batch in test_batches:
            x_utterances, x_utterances_len, x_utterances_num, x_u_char, x_u_char_len, \
              x_response, x_response_len, x_r_char, x_r_char_len, \
              x_personas, x_personas_len, x_personas_num, x_p_char, x_p_char_len, \
              x_target, x_ids = test_batch
            feed_dict = {
                utterances: x_utterances,
                utterances_len: x_utterances_len,
                responses: x_response,
                responses_len: x_response_len,
                utterances_num: x_utterances_num,
                dropout_keep_prob: 1.0,
                u_char_feature: x_u_char,
Exemple #8
0
        question_len_x = graph.get_operation_by_name("question_len").outputs[0]
        answer_len_x = graph.get_operation_by_name("answer_len").outputs[0]
        lastTurn_len_x = graph.get_operation_by_name("lastTurn_len").outputs[0]

        dropout_keep_prob = graph.get_operation_by_name(
            "dropout_keep_prob").outputs[0]

        # Tensors we want to evaluate
        prob = graph.get_operation_by_name("convolution-1/prob").outputs[0]

        results = []
        num_test = 0
        test_batches = data_helpers.batch_iter(test_dataset,
                                               FLAGS.batch_size,
                                               1,
                                               SEQ_LEN,
                                               shuffle=False)
        for test_batch in test_batches:
            x_question, x_answer, x_question_len, x_answer_len, x_lastTurn, x_lastTurn_len, x_q_id_list, x_as_id_list, x_target = test_batch
            feed_dict = {
                question_x: x_question,
                answer_x: x_answer,
                lastTurn_x: x_lastTurn,
                question_len_x: x_question_len,
                answer_len_x: x_answer_len,
                lastTurn_len_x: x_lastTurn_len,
                dropout_keep_prob: 1.0
            }
            predicted_prob = sess.run(prob, feed_dict)
            num_test += len(predicted_prob)
Exemple #9
0
        def check_step(dataset, shuffle, is_test=False, path=None):
            results = defaultdict(list)
            num_test = 0
            num_correct = 0.0
            conv_correct = 0.0
            if is_test:
                file = open(path, 'w')
            valid_batches = data_helpers.batch_iter(dataset,
                                                    FLAGS.batch_size,
                                                    1,
                                                    target_loss_weight,
                                                    FLAGS.max_utter_len,
                                                    FLAGS.max_utter_num,
                                                    charVocab,
                                                    FLAGS.max_word_length,
                                                    shuffle=shuffle)
            for valid_batch in valid_batches:
                x_utterances, x_utterances_len, x_utterances_num, x_utterances_char, x_utterances_char_len, x_target, x_id, x_target_weight, dialogue_label = valid_batch
                feed_dict = {
                    model.utterances: x_utterances,
                    model.utterances_len: x_utterances_len,
                    model.utterances_num: x_utterances_num,
                    model.target: x_target,
                    model.target_loss_weight: x_target_weight,
                    model.dropout_keep_prob: 1.0,
                    model.u_charVec: x_utterances_char,
                    model.u_charLen: x_utterances_char_len,
                    model.dialogue_label: dialogue_label,
                }

                batch_accuracy, predicted_prob, conv_acc = sess.run(
                    [model.accuracy, model.probs, model.conv_acc], feed_dict)
                num_test += len(predicted_prob)

                if num_test % 100000 == 0:
                    print(num_test)

                # method 1
                conv_correct += len(dialogue_lable) * conv_acc

                # method 2
                predicted_target = np.argmax(
                    predicted_prob, axis=2)  # [batch_size, max_utter_num]
                for i in range(len(predicted_prob)):
                    i_utterances_num = x_utterances_num[i]
                    i_predicted_target = predicted_target[i][:i_utterances_num]
                    i_target = x_target[i][:i_utterances_num]
                    if np.sum((i_predicted_target == i_target
                               ).astype(int)) == x_utterances_num[i]:
                        num_correct += 1
                if is_test:
                    for i in range(len(x_id)):
                        x_id_ = x_id[i]
                        i_utterances_num = x_utterances_num[i]
                        for j in range(i_utterances_num):
                            i_predicted_target = predicted_target[i][j]
                            i_target = x_target[i][j]
                            file.write(str(x_id_))
                            file.write('\t')
                            file.write(str(i_utterances_num))
                            file.write('\t')
                            file.write(str(i_predicted_target))
                            file.write('\t')
                            file.write(str(i_target))
                            file.write('\n')

            # calculate Accuracy
            acc = num_correct / num_test
            cov_acc = conv_correct / num_test
            print('num_test_samples: {}  accuracy: {} \n'.format(
                num_test, acc))
            print('conversation accuracy: {} \n'.format(cov_acc))
            if is_test:
                file.close()
            return acc
Exemple #10
0
            acc = num_correct / num_test
            cov_acc = conv_correct / num_test
            print('num_test_samples: {}  accuracy: {} \n'.format(
                num_test, acc))
            print('conversation accuracy: {} \n'.format(cov_acc))
            if is_test:
                file.close()
            return acc

        EPOCH = 0
        best_acc = 0.0
        batches = data_helpers.batch_iter(train_dataset,
                                          FLAGS.batch_size,
                                          FLAGS.num_epochs,
                                          target_loss_weight,
                                          FLAGS.max_utter_len,
                                          FLAGS.max_utter_num,
                                          charVocab,
                                          FLAGS.max_word_length,
                                          shuffle=True)
        for batch in batches:
            x_utterances, x_utterances_len, x_utterances_num, x_utterances_char, x_utterances_char_len, x_target, x_id, x_target_weight, dialogue_label = batch
            train_step(x_utterances, x_utterances_len, x_utterances_num,
                       x_utterances_char, x_utterances_char_len, x_target,
                       x_id, x_target_weight, dialogue_label)
            current_step = tf.train.global_step(sess, global_step)
            if current_step == 10000:
                train_op = train_op2
                print('change to train_op2')
            if current_step % FLAGS.evaluate_every == 0:
                EPOCH += 1
Exemple #11
0
                if num_test % 1000 == 0:
                    print(num_test)

                num_correct += len(predicted_prob) * batch_accuracy

            # calculate Accuracy
            acc = num_correct / num_test
            print('num_test_samples: {}  accuracy: {}'.format(num_test, acc))

            return acc

        best_acc = 0.0
        EPOCH = 0
        batches = data_helpers.batch_iter(train_dataset,
                                          FLAGS.batch_size,
                                          FLAGS.num_epochs,
                                          idf,
                                          SEQ_LEN,
                                          shuffle=True)
        for batch in batches:
            x_premise, x_hypothesis, x_premise_len, x_hypothesis_len, \
            targets, extra_feature, p_features, h_features = batch
            train_step(x_premise, x_hypothesis, x_premise_len,
                       x_hypothesis_len, targets, extra_feature, p_features,
                       h_features)
            current_step = tf.train.global_step(sess, global_step)
            if current_step % FLAGS.evaluate_every == 0:
                EPOCH += 1
                print("\nEPOCH: {}".format(EPOCH))
                print("Evaluation on dev:")
                valid_acc = check_step(dev_dataset, shuffle=True)
                print("\nEvaluation on test:")
Exemple #12
0
                    results[question_id].append((response_id, label, prob_score))

            #calculate top-1 precision
            print('num_test_samples: {}  test_accuracy: {}'.format(num_test, num_correct/num_test))
            accu, precision, recall, f1, loss = metrics.classification_metrics(results)
            print('Accuracy: {}, Precision: {}  Recall: {}  F1: {} Loss: {}'.format(accu, precision, recall, f1, loss))

            mvp = metrics.mean_average_precision(results)
            mrr = metrics.mean_reciprocal_rank(results)
            top_1_precision = metrics.top_1_precision(results)
            total_valid_query = metrics.get_num_valid_query(results)
            print('MAP (mean average precision: {}\tMRR (mean reciprocal rank): {}\tTop-1 precision: {}\tNum_query: {}'.format(mvp, mrr, top_1_precision, total_valid_query))

            return mrr

        best_mrr = 0.0
        batches = data_helpers.batch_iter(train_dataset, FLAGS.batch_size, FLAGS.num_epochs, target_loss_weight, FLAGS.max_utter_len, FLAGS.max_utter_num, FLAGS.max_response_len, shuffle=True)
        for batch in batches:
            x_utterances, x_response, x_utterances_len, x_response_len, x_utters_num, x_target, x_target_weight, id_pairs = batch
            train_step(x_utterances, x_response, x_utterances_len, x_response_len, x_utters_num, x_target, x_target_weight, id_pairs)
            current_step = tf.train.global_step(sess, global_step)
            if current_step % FLAGS.evaluate_every == 0:
                print("\nEvaluation:")
                valid_mrr = dev_step()
                if valid_mrr > best_mrr:
                    best_mrr = valid_mrr
                    test_mrr = test_step()
                    path = saver.save(sess, checkpoint_prefix, global_step=current_step)
                    print("Saved model checkpoint to {}\n".format(path))