Esempio n. 1
0
    def train(self):
        self.max_acc = 1
        self.is_training = True
        with tf.Graph().as_default():
            data_processor = DataProcessor()

            vocab_size = data_processor.get_vocabulary_size(FLAGS.vocab_path)
            vocab, revocab = DataProcessor.initialize_vocabulary(
                FLAGS.vocab_path)
            data_processor.get_init(FLAGS.input_training_data_path,
                                    FLAGS.input_validation_data_path, vocab,
                                    vocab_size, FLAGS.max_length, revocab)
            models = AttentionModel()

            input_q = tf.placeholder(tf.int32,
                                     shape=(None, FLAGS.max_length),
                                     name="input_x1")  # FLAGS.train_batch_size
            input_ap = tf.placeholder(tf.int32, shape=(None, FLAGS.max_length))
            input_an = tf.placeholder(tf.int32, shape=(None, FLAGS.max_length))
            # input_k = tf.placeholder(tf.int32, shape=(None, FLAGS.max_length))
            # input_v = tf.placeholder(tf.int32, shape=(None, FLAGS.max_length))
            q_encode = models.embed(
                inputs=input_q,
                vocab_size=vocab_size + 1,
                num_units=hp.hidden_units)  # embedding size plus 1 for padding
            ap_encode = models.embed(inputs=input_ap,
                                     vocab_size=vocab_size + 1,
                                     num_units=hp.hidden_units)
            an_encode = models.embed(inputs=input_an,
                                     vocab_size=vocab_size + 1,
                                     num_units=hp.hidden_units)
            # k_encode = models.embed(input_k, vocab_size=vocab_size,num_units=hp.hidden_units)
            # v_encode = models.embed(input_v, vocab_size=vocab_size,num_units=hp.hidden_units)
            # apply dropout
            q_encode = tf.layers.dropout(q_encode,
                                         hp.dropout_rate,
                                         training=tf.convert_to_tensor(
                                             self.is_training))
            ap_encode = tf.layers.dropout(ap_encode,
                                          hp.dropout_rate,
                                          training=tf.convert_to_tensor(
                                              self.is_training))
            an_encode = tf.layers.dropout(an_encode,
                                          hp.dropout_rate,
                                          training=tf.convert_to_tensor(
                                              self.is_training))
            # k_encode = tf.layers.dropout(k_encode, hp.dropout_rate, training=tf.convert_to_tensor(self.is_training))
            # v_encode = tf.layers.dropout(v_encode, hp.dropout_rate, training=tf.convert_to_tensor(self.is_training))

            # multihead blocks
            for i in range(hp.num_blocks):
                with tf.variable_scope("num_blocks_{}".format(i)):
                    q_encode = models.multihead_attention(
                        query=q_encode,
                        key=q_encode,
                        value=q_encode,
                        num_heads=hp.num_heads,
                        is_training=tf.convert_to_tensor(self.is_training),
                        dropout_rate=hp.dropout_rate,
                        mask_future=False)
                    q_encode = models.feed_forward(
                        q_encode, units=[hp.hidden_units * 4, hp.hidden_units])
                    ap_encode = models.multihead_attention(
                        query=ap_encode,
                        key=ap_encode,
                        value=ap_encode,
                        num_heads=hp.num_heads,
                        is_training=tf.convert_to_tensor(self.is_training),
                        dropout_rate=hp.dropout_rate,
                        mask_future=False)
                    ap_encode = models.feed_forward(
                        ap_encode,
                        units=[hp.hidden_units * 4, hp.hidden_units])
                    an_encode = models.multihead_attention(
                        query=an_encode,
                        key=an_encode,
                        value=an_encode,
                        num_heads=hp.num_heads,
                        is_training=tf.convert_to_tensor(self.is_training),
                        dropout_rate=hp.dropout_rate,
                        mask_future=False)
                    an_encode = models.feed_forward(
                        an_encode,
                        units=[hp.hidden_units * 4, hp.hidden_units])

                ## output layer
            with tf.name_scope('output_layer'):
                dims = q_encode.get_shape().as_list()
                q_encode = tf.reshape(q_encode, [-1, dims[1] * dims[2]])
                ap_encode = tf.reshape(ap_encode, [-1, dims[1] * dims[2]])
                an_encode = tf.reshape(an_encode, [-1, dims[1] * dims[2]])
                weight = tf.get_variable(
                    'output_weight',
                    [q_encode.get_shape().as_list()[-1], hp.hidden_units])

                q_encode = tf.matmul(q_encode, weight)
                ap_encode = tf.matmul(ap_encode, weight)
                an_encode = tf.matmul(an_encode, weight)

            q_encode = models.vec_normalize(q_encode)
            ap_encode = models.vec_normalize(ap_encode)
            an_encode = models.vec_normalize(an_encode)

            ## calculate similarity and loss
            cos_12 = tf.reduce_sum(tf.multiply(q_encode, ap_encode),
                                   1)  # wisely multiple vectors
            cos_13 = tf.reduce_sum(tf.multiply(q_encode, an_encode), 1)

            zero = tf.constant(0,
                               shape=[FLAGS.train_batch_size],
                               dtype=tf.float32)
            margin = tf.constant(FLAGS.loss_margin,
                                 shape=[FLAGS.train_batch_size],
                                 dtype=tf.float32)

            losses = tf.maximum(
                zero, tf.subtract(margin, tf.subtract(cos_12, cos_13)))
            loss_sum = tf.reduce_sum(losses)
            loss_avg = tf.div(loss_sum, FLAGS.train_batch_size)
            correct = tf.equal(zero, losses)
            accuracy = tf.reduce_mean(tf.cast(correct, "float"),
                                      name="accuracy")

            global_step = tf.Variable(
                0, name="global_step", trainable=False
            )  # The global step will be automatically incremented by one every time you execute a train loop
            optimizer = tf.train.GradientDescentOptimizer(FLAGS.learning_rate)
            # optimizer = tf.train.AdamOptimizer(FLAGS.learning_rate)
            grads_and_vars = optimizer.compute_gradients(loss_avg)
            train_op = optimizer.apply_gradients(grads_and_vars,
                                                 global_step=global_step)
            saver = tf.train.Saver(tf.global_variables())

            # session start point
            with tf.Session() as session:
                session.run(tf.local_variables_initializer())
                session.run(tf.global_variables_initializer())
                session.run(tf.tables_initializer())
                # meta_path = FLAGS.output_model_path + '/step6500_loss0.0_trainAcc1.0_evalAcc0.36.meta'
                # model_path = FLAGS.output_model_path + '/step6500_loss0.0_trainAcc1.0_evalAcc0.36'
                # saver = tf.train.import_meta_graph(meta_path)
                # print('graph imported')
                # saver.restore(session, model_path)
                # print('variables restored!')

                # Load pre-trained model
                ckpt = tf.train.get_checkpoint_state(
                    FLAGS.input_previous_model_path)
                if ckpt and ckpt.model_checkpoint_path:
                    saver.restore(session, ckpt.model_checkpoint_path)
                    print("Load Model From ", ckpt.model_checkpoint_path)
                else:
                    print("No model found")

                print("Begin to train model.")
                max_acc = 0
                for step in range(FLAGS.training_steps):
                    train_data_batch = data_processor.next_batch_train_random(
                        FLAGS.train_batch_size)
                    train_q_vec_b, train_q_vec_len_b, train_d_vec_b, train_d_vec_len_b, train_dneg_vec_b, train_dneg_vec_len_b = train_data_batch
                    feed_dict = {
                        input_q: train_q_vec_b,
                        input_ap: train_d_vec_b,
                        input_an: train_dneg_vec_b
                    }
                    _, loss_avg_, accuracy_, step_ = session.run(
                        [train_op, loss_avg, accuracy, global_step],
                        feed_dict=feed_dict)
                    print('=' * 10 + 'step{}, loss_avg = {}, acc={}'.format(
                        step_, loss_avg_, accuracy_))  # loss for all batches
                    if step_ % FLAGS.eval_every == 0:
                        print('\n============================> begin to test ')
                        eval_size = FLAGS.validation_size

                        def test_step(input_y1, input_y2, input_y3, label_list,
                                      sess):
                            feed_dict = {
                                input_q: input_y1,
                                input_ap: input_y2,
                                input_an: input_y3
                            }

                            correct_flag = 0
                            cos_12_ = sess.run(cos_12, feed_dict)
                            cos_max = max(cos_12_)
                            index_max = list(cos_12_).index(cos_max)
                            if label_list[index_max] == '1':
                                correct_flag = 1
                            # cos_pos_, cos_neg_, accuracy_ = sess.run([cos_12, cos_13, accuracy], feed_dict)
                            # data_processor.saveFeatures(cos_pos_, cos_neg_, test_loss_, accuracy_)
                            return correct_flag

                        def evaluate(eval_size):
                            correct_num = int(0)
                            for i in range(eval_size):
                                print('evaluation step %d ' % i)
                                batches = data_processor.loadValData_step(
                                    vocab, vocab_size,
                                    FLAGS.input_validation_data_path,
                                    FLAGS.max_length,
                                    eval_size)  # batch_size*seq_len
                                # 显示/保存测试数据
                                # save_test_data(batch_y1, batch_y2, label_list)
                                batch_y1, batch_y2, label_list = batches[i]
                                correct_flag = test_step(
                                    batch_y1, batch_y2, batch_y2, label_list,
                                    session)
                                correct_num += correct_flag
                            print('correct_num', correct_num)
                            acc = correct_num / float(eval_size)
                            return acc

                        self.is_training = False
                        acc_ = evaluate(eval_size=eval_size)
                        self.is_training = True
                        print(
                            '--------The test result among the test data sets: acc = {0}, test size = {1}----------'
                            .format(acc_, eval_size))
                        if acc_ >= max_acc:
                            max_acc = acc_
                            # # acc = test_for_bilstm.test()
                            path = saver.save(
                                session, FLAGS.output_model_path + '/step' +
                                str(step_) + '_loss' + str(loss_avg_) +
                                '_trainAcc' + str(accuracy_) + '_evalAcc' +
                                str(acc_))
                            saver.export_meta_graph(FLAGS.output_model_path +
                                                    '/meta_' + 'step' +
                                                    str(step_) + '_loss' +
                                                    str(loss_avg_) +
                                                    '_trainAcc' +
                                                    str(accuracy_) +
                                                    '_evalAcc' + str(acc_))
                            print("Save checkpoint(model) to {}".format(path))
Esempio n. 2
0
    def eval(self):
        self.max_acc = 1
        self.is_training = False
        with tf.Graph().as_default():
            data_processor = DataProcessor()

            vocab_size = data_processor.get_vocabulary_size(FLAGS.vocab_path)
            vocab, revocab = DataProcessor.initialize_vocabulary(
                FLAGS.vocab_path)
            data_processor.get_init(FLAGS.input_training_data_path,
                                    FLAGS.input_validation_data_path, vocab,
                                    vocab_size, FLAGS.max_length, revocab)
            models = AttentionModel()

            input_q = tf.placeholder(tf.int32,
                                     shape=(None, FLAGS.max_length),
                                     name="input_x1")  # FLAGS.train_batch_size
            input_ap = tf.placeholder(tf.int32, shape=(None, FLAGS.max_length))
            input_an = tf.placeholder(tf.int32, shape=(None, FLAGS.max_length))
            q_encode = models.embed(
                inputs=input_q,
                vocab_size=vocab_size + 1,
                num_units=hp.hidden_units)  # embedding size plus 1 for padding
            ap_encode = models.embed(inputs=input_ap,
                                     vocab_size=vocab_size + 1,
                                     num_units=hp.hidden_units)
            an_encode = models.embed(inputs=input_an,
                                     vocab_size=vocab_size + 1,
                                     num_units=hp.hidden_units)

            # multihead blocks
            for i in range(hp.num_blocks):
                with tf.variable_scope("num_blocks_{}".format(i)):
                    q_encode = models.multihead_attention(
                        query=q_encode,
                        key=q_encode,
                        value=q_encode,
                        num_heads=hp.num_heads,
                        mask_future=False)
                    q_encode = models.feed_forward(
                        q_encode, units=[hp.hidden_units * 4, hp.hidden_units])
                    ap_encode = models.multihead_attention(
                        query=ap_encode,
                        key=ap_encode,
                        value=ap_encode,
                        num_heads=hp.num_heads,
                        mask_future=False)
                    ap_encode = models.feed_forward(
                        ap_encode,
                        units=[hp.hidden_units * 4, hp.hidden_units])
                    an_encode = models.multihead_attention(
                        query=an_encode,
                        key=an_encode,
                        value=an_encode,
                        num_heads=hp.num_heads,
                        mask_future=False)
                    an_encode = models.feed_forward(
                        an_encode,
                        units=[hp.hidden_units * 4, hp.hidden_units])

                ## output layer
            with tf.name_scope('output_layer'):
                dims = q_encode.get_shape().as_list()
                q_encode = tf.reshape(q_encode, [-1, dims[1] * dims[2]])
                ap_encode = tf.reshape(ap_encode, [-1, dims[1] * dims[2]])
                an_encode = tf.reshape(an_encode, [-1, dims[1] * dims[2]])
                weight = tf.get_variable(
                    'output_weight',
                    [q_encode.get_shape().as_list()[-1], hp.hidden_units])

                q_encode = tf.matmul(q_encode, weight)
                ap_encode = tf.matmul(ap_encode, weight)
                an_encode = tf.matmul(an_encode, weight)

            q_encode = models.vec_normalize(q_encode)
            ap_encode = models.vec_normalize(ap_encode)
            an_encode = models.vec_normalize(an_encode)

            ## calculate similarity and loss
            cos_12 = tf.reduce_sum(tf.multiply(q_encode, ap_encode),
                                   1)  # wisely multiple vectors
            cos_13 = tf.reduce_sum(tf.multiply(q_encode, an_encode), 1)

            zero = tf.constant(0,
                               shape=[FLAGS.train_batch_size],
                               dtype=tf.float32)
            margin = tf.constant(FLAGS.loss_margin,
                                 shape=[FLAGS.train_batch_size],
                                 dtype=tf.float32)

            losses = tf.maximum(
                zero, tf.subtract(margin, tf.subtract(cos_12, cos_13)))
            loss_sum = tf.reduce_sum(losses)
            loss_avg = tf.div(loss_sum, FLAGS.train_batch_size)
            correct = tf.equal(zero, losses)
            accuracy = tf.reduce_mean(tf.cast(correct, "float"),
                                      name="accuracy")

            global_step = tf.Variable(
                0, name="global_step", trainable=False
            )  # The global step will be automatically incremented by one every time you execute a train loop
            optimizer = tf.train.GradientDescentOptimizer(FLAGS.learning_rate)
            # optimizer = tf.train.AdamOptimizer(FLAGS.learning_rate)
            grads_and_vars = optimizer.compute_gradients(loss_avg)
            train_op = optimizer.apply_gradients(grads_and_vars,
                                                 global_step=global_step)
            saver = tf.train.Saver(tf.global_variables())

            # session start point
            with tf.Session() as session:
                session.run(tf.local_variables_initializer())
                session.run(tf.global_variables_initializer())
                session.run(tf.tables_initializer())

                # Load pre-trained model
                ckpt = tf.train.get_checkpoint_state(
                    FLAGS.input_previous_model_path)
                if ckpt and ckpt.model_checkpoint_path:
                    saver.restore(session, ckpt.model_checkpoint_path)
                    print("Load Model From ", ckpt.model_checkpoint_path)
                else:
                    print("No model found and exit.")
                    exit()

                print(
                    '\n============================> begin to evaluate model. '
                )
                eval_size = FLAGS.evaluation_size

                def evaluate_all():
                    correct_num = int(0)
                    batches = data_processor.loadValData_step(
                        vocab,
                        vocab_size,
                        FLAGS.input_validation_data_path,
                        FLAGS.max_length,
                        eval_size=0)  # batch_size*seq_len
                    for i in range(eval_size):
                        # 显示/保存测试数据
                        # save_test_data(batch_y1, batch_y2, label_list)
                        batch_y1, batch_y2, label_list = batches[i]
                        correct_flag = test_step(batch_y1, batch_y2, batch_y2,
                                                 label_list, session)
                        correct_num += correct_flag
                        if (correct_flag == 1):
                            print('step %d ==== correct prediction' % i)
                        else:
                            print('step %d ==== wrong prediction' % i)
                    print('correct_num', correct_num)
                    acc = correct_num / float(eval_size)
                    return acc

                def test_step(input_y1, input_y2, input_y3, label_list, sess):
                    feed_dict = {
                        input_q: input_y1,
                        input_ap: input_y2,
                        input_an: input_y3
                    }

                    correct_flag = 0
                    cos_12_ = sess.run(cos_12, feed_dict)
                    cos_max = max(cos_12_)
                    index_max = list(cos_12_).index(cos_max)
                    if label_list[index_max] == '1':
                        correct_flag = 1
                    return correct_flag

                def evaluate(eval_size):
                    correct_num = int(0)
                    batches = data_processor.loadValData_step(
                        vocab, vocab_size, FLAGS.input_validation_data_path,
                        FLAGS.max_length, eval_size)  # batch_size*seq_len
                    for i in range(eval_size):
                        # 显示/保存测试数据
                        # save_test_data(batch_y1, batch_y2, label_list)
                        batch_y1, batch_y2, label_list = batches[i]
                        correct_flag = test_step(batch_y1, batch_y2, batch_y2,
                                                 label_list, session)
                        correct_num += correct_flag
                        if (correct_flag == 1):
                            print('step %d ==== correct prediction' % i)
                        else:
                            print('step %d ==== wrong prediction' % i)
                    print('correct_num', correct_num)
                    acc = correct_num / float(eval_size)
                    return acc

                acc_ = evaluate(eval_size=eval_size)
                print(
                    '--------The test result among the test data sets: acc = {0}, test size = {1}----------'
                    .format(acc_, eval_size))
                exit()