コード例 #1
0
ファイル: trained_main.py プロジェクト: zjhyyzz/GP-Net
def evaluate_span_batch(config, selector, spaner, num_batches, eval_file, sess,
                        data_type, dp):
    answer_dict = {}
    losses = []
    SN = config.k
    sen_len_limit = config.sen_len
    question_type_num = 19
    reward = np.zeros([config.batch_size], np.float32)
    for batch_num in tqdm(range(0, num_batches)):
        tensor_dict, _ = dp.get_train_batch(batch_num, is_test=True)
        qa_id, yp, outer, para_encode, para_enc_mask, q_emb, sen_emb, lo = sess.run(
            [
                selector.qa_id, selector.yp, selector.outer, selector.att_s,
                selector.c_p_mask, selector.q_emb, selector.c_emb, selector.lo
            ],
            feed_dict={
                selector.qa_id: tensor_dict['ids'],
                selector.q: tensor_dict['ques_idxs'],
                selector.cs: tensor_dict['context_s_idxs'],
                selector.y: tensor_dict['y'],
                selector.ce: tensor_dict['context_s_exist_tag'],
                selector.ct: tensor_dict['context_type_tag'],
                selector.qt: tensor_dict['ques_type_tag'],
                selector.reward: reward
            })
        np.savetxt("lo.txt", lo)

        select_sentence = []
        sentences_len = []
        q = []

        sentences_cs = []
        sentences_ce = []
        sentences_ct = []

        for i in range(config.batch_size):
            ques = np.zeros([config.ques_limit, q_emb.shape[-1]], np.float32)
            ques[:q_emb.shape[-2]] = q_emb[i]
            q.append(ques)
            sentences = []
            sentence_len = []
            sum = tensor_dict['sentence_num'][i]
            indexs = np.argsort(-outer[i])
            for j in range(config.k):
                top_index = indexs[j]
                sentence = np.zeros([config.sen_len, sen_emb.shape[-1]],
                                    np.float32)
                sentence[:sen_emb.shape[-2]] = sen_emb[i][top_index]
                sentences.append(sentence)
                sentence_length = np.arange(
                    sum[indexs[j] - 1], sum[indexs[j] - 1] + config.sen_len, 1)
                sentence_len.append(sentence_length)
            sentence_len = np.array(sentence_len)
            sentences = np.array(sentences)
            select_sentence.append(sentences)
            sentences_len.append(sentence_len)
        select_sentences = np.array(select_sentence)
        sentences_lens = np.array(sentences_len)
        q = np.array(q)

        qa_id, loss, train_op_span, yp1, yp2, topk, ou1, out2, \
        loss1, loss2, \
        lo1, lo2, lo3, lo4 = sess.run(
            [spaner.qa_id, spaner.loss_span, spaner.train_op, spaner.yp1, spaner.yp2, spaner.topk,
             spaner.out_lo1, spaner.out_lo2,
             spaner.out1, spaner.out2,
             spaner.lo1, spaner.lo2, spaner.lo3, spaner.lo4],
            feed_dict={spaner.qa_id: tensor_dict['ids'],

                       spaner.para: tensor_dict['para_idxs'],
                       spaner.para_e: tensor_dict['para_exist_tag'],
                       spaner.para_t: tensor_dict['para_type_tag'],

                       spaner.q: q,
                       spaner.sentence: select_sentences,
                       spaner.sentence_index: sentences_lens,
                       spaner.outer: outer,
                       spaner.para_enc: para_encode,
                       spaner.para_enc_mask: para_enc_mask,
                       spaner.y1: tensor_dict['y1'],
                       spaner.y2: tensor_dict['y2']
                       })
        answer_dict_, _ = convert_tokens(eval_file, qa_id.tolist(),
                                         yp1.tolist(), yp2.tolist())
        answer_dict.update(answer_dict_)
        losses.append(loss)
    loss = np.mean(losses)
    metrics = evaluate(eval_file, answer_dict)
    metrics["loss"] = loss
    loss_sum = tf.Summary(value=[
        tf.Summary.Value(tag="{}/loss".format(data_type),
                         simple_value=metrics["loss"]),
    ])
    f1_sum = tf.Summary(value=[
        tf.Summary.Value(tag="{}/f1".format(data_type),
                         simple_value=metrics["f1"]),
    ])
    em_sum = tf.Summary(value=[
        tf.Summary.Value(tag="{}/em".format(data_type),
                         simple_value=metrics["exact_match"]),
    ])
    return metrics, [loss_sum, f1_sum, em_sum]
コード例 #2
0
ファイル: trained_main.py プロジェクト: zjhyyzz/GP-Net
def test(config):
    with open(config.word_emb_file, "r") as fh:
        word_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.char_emb_file, "r") as fh:
        char_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.dev_eval_file, "r") as fh:
        eval_file = json.load(fh)
    with open(config.dev_example, "r") as fh:
        example_file = json.load(fh)
    # # with open(config.test_eval_file, "r") as fh:
    # #     eval_file = json.load(fh)
    #
    # # dp_test = DataProcessor('test', config)
    # # total = dp_test.get_data_size()
    dp_test = DataProcessor('test', config, is_test=True)
    total = dp_test.get_data_size()
    print("Loading model...")

    sess_config = tf.ConfigProto(allow_soft_placement=True)
    sess_config.gpu_options.allow_growth = True

    with tf.Session(config=sess_config) as sess:
        selector = select_model(sess, config, word_mat, char_mat)
        spaner = span_model(sess, config, word_mat, char_mat)
        sess.run(tf.global_variables_initializer())
        variables = tf.contrib.framework.get_variables_to_restore()
        variables_to_restore = variables
        #variables_to_restore = [v for v in variables if v.name.split('/')[0] != 'output']

        saver = tf.train.Saver()
        # saver = tf.train.Saver()

        saver.restore(sess,
                      tf.train.latest_checkpoint(config.span_save_dir_trained))
        '''
        from tensorflow.python import pywrap_tensorflow
        checkpoint_path = tf.train.latest_checkpoint(config.span_save_dir_trained)
        reader = pywrap_tensorflow.NewCheckpointReader(checkpoint_path)
        var_to_shape_map = reader.get_variable_to_shape_map()

        reader = tf.train.NewCheckpointReader(tf.train.latest_checkpoint(config.span_save_dir_trained))

        variables = reader.get_variable_to_shape_map()
        saver.restore(sess, tf.train.latest_checkpoint(config.span_save_dir_trained))
        '''
        sess.run(
            tf.assign(selector.is_train, tf.constant(False, dtype=tf.bool)))
        sess.run(tf.assign(spaner.is_train, tf.constant(False, dtype=tf.bool)))
        losses = []
        answer_dict = {}
        remapped_dict = {}
        for step in tqdm(range(total // config.batch_size)):

            tensor_dict, _ = dp_test.get_train_batch(step, is_test=True)

            qa_id, yp, outer, para_encode, para_enc_mask, q_emb, sen_emb, lo = sess.run(
                [
                    selector.qa_id, selector.yp, selector.outer,
                    selector.att_s, selector.c_p_mask, selector.q_emb,
                    selector.c_emb, selector.lo
                ],
                feed_dict={
                    selector.qa_id: tensor_dict['ids'],
                    selector.q: tensor_dict['ques_idxs'],
                    selector.cs: tensor_dict['context_s_idxs'],
                    selector.y: tensor_dict['y'],
                    selector.ce: tensor_dict['context_s_exist_tag'],
                    selector.ct: tensor_dict['context_type_tag'],
                    selector.qt: tensor_dict['ques_type_tag']
                })
            select_sentence = []
            sentences_len = []
            q = []
            for i in range(config.batch_size):
                ques = np.zeros([config.ques_limit, q_emb.shape[-1]],
                                np.float32)
                ques[:q_emb.shape[-2]] = q_emb[i]
                q.append(ques)
                sentences = []
                sentence_len = []
                sum = tensor_dict['sentence_num'][i]
                indexs = np.argsort(-outer[i])
                for j in range(config.k):
                    top_index = indexs[j]
                    sentence = np.zeros([config.sen_len, sen_emb.shape[-1]],
                                        np.float32)
                    sentence[:sen_emb.shape[-2]] = sen_emb[i][top_index]
                    sentences.append(sentence)
                    sentence_length = np.arange(
                        sum[indexs[j] - 1],
                        sum[indexs[j] - 1] + config.sen_len, 1)
                    sentence_len.append(sentence_length)
                sentence_len = np.array(sentence_len)
                sentences = np.array(sentences)
                select_sentence.append(sentences)
                sentences_len.append(sentence_len)
            select_sentences = np.array(select_sentence)
            sentences_lens = np.array(sentences_len)
            q = np.array(q)
            qa_id, loss, train_op_span, yp1, yp2 = sess.run(
                [
                    spaner.qa_id, spaner.loss_span, spaner.train_op,
                    spaner.yp1, spaner.yp2
                ],
                feed_dict={
                    spaner.qa_id: tensor_dict['ids'],
                    spaner.para: tensor_dict['para_idxs'],
                    spaner.para_e: tensor_dict['para_exist_tag'],
                    spaner.para_t: tensor_dict['para_type_tag'],
                    spaner.q: q,
                    spaner.sentence: select_sentences,
                    spaner.sentence_index: sentences_lens,
                    spaner.outer: outer,
                    spaner.para_enc: para_encode,
                    spaner.para_enc_mask: para_enc_mask,
                    spaner.y1: tensor_dict['y1'],
                    spaner.y2: tensor_dict['y2']
                })

            answer_dict_, remapped_dict_ = convert_tokens(
                eval_file, qa_id.tolist(), yp1.tolist(), yp2.tolist())
            answer_dict.update(answer_dict_)
            remapped_dict.update(remapped_dict_)
            losses.append(loss)
        loss = np.mean(losses)
        metrics = evaluate(eval_file, answer_dict)
        with open(config.answer_file, "w") as fh:
            json.dump(remapped_dict, fh)
        print("Exact Match: {}, F1: {}".format(metrics['exact_match'],
                                               metrics['f1']))
コード例 #3
0
ファイル: trained_main.py プロジェクト: zjhyyzz/GP-Net
def train(config):
    with open(config.word_emb_file, "r") as fh:
        word_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.char_emb_file, "r") as fh:
        char_mat = np.array(json.load(fh), dtype=np.float32)
    with open(config.train_eval_file, "r") as fh:
        train_eval_file = json.load(fh)
    with open(config.dev_eval_file, "r") as fh:
        dev_eval_file = json.load(fh)

    dp_train = DataProcessor('train', config)
    dp_dev = DataProcessor('dev', config)
    #dp_dev = dp_train
    dev_total = dp_dev.get_data_size()

    print("Building model...")

    sess_config = tf.ConfigProto(allow_soft_placement=True)
    sess_config.gpu_options.allow_growth = True

    loss_save = 100.0
    patience = 0
    lr = config.init_lr
    lr_span = config.init_lr_span
    train_batch_num = int(np.floor(
        dp_train.num_samples / config.batch_size)) - 1
    batch_no = 0
    SN = config.k
    with tf.Session(config=sess_config) as sess:
        selector = select_model(sess, config, word_mat, char_mat)
        spaner = span_model(sess, config, word_mat, char_mat)
        writer = tf.summary.FileWriter(config.RL_log_dir)
        sess.run(tf.global_variables_initializer())

        #variables = tf.contrib.framework.get_variables_to_restore()
        #variables_to_restore = variables
        # #variables_to_restore = [v for v in variables if v.name.split('/')[0] != 'output']
        # #
        #saver_selector = tf.train.Saver(variables_to_restore)
        # # saver_selector.restore(sess, tf.train.latest_checkpoint(config.save_dir))
        saver = tf.train.Saver()
        saver.restore(sess,
                      tf.train.latest_checkpoint(config.span_save_dir_trained))
        sess.run(tf.assign(selector.is_train, tf.constant(True,
                                                          dtype=tf.bool)))
        sess.run(tf.assign(spaner.is_train, tf.constant(True, dtype=tf.bool)))
        sess.run(tf.assign(selector.lr, tf.constant(lr, dtype=tf.float32)))
        sess.run(tf.assign(spaner.lr, tf.constant(lr_span, dtype=tf.float32)))
        sen_len_limit = config.sen_len
        question_type_num = 19
        for batch_time in tqdm(range(1, config.num_steps + 1)):

            tensor_dict, _ = dp_train.get_train_batch(batch_no %
                                                      train_batch_num)
            sess.run(
                tf.assign(selector.is_train, tf.constant(False,
                                                         dtype=tf.bool)))
            sess.run(
                tf.assign(spaner.is_train, tf.constant(True, dtype=tf.bool)))
            qa_id, yp, outer, para_encode, para_enc_mask, q_emb, sen_emb, lo = sess.run(
                [
                    selector.qa_id, selector.yp, selector.outer,
                    selector.att_s, selector.c_p_mask, selector.q_emb,
                    selector.c_emb, selector.lo
                ],
                feed_dict={
                    selector.qa_id: tensor_dict['ids'],
                    selector.q: tensor_dict['ques_idxs'],
                    selector.cs: tensor_dict['context_s_idxs'],
                    selector.y: tensor_dict['y'],
                    selector.ce: tensor_dict['context_s_exist_tag'],
                    selector.ct: tensor_dict['context_type_tag'],
                    selector.qt: tensor_dict['ques_type_tag']
                })
            np.savetxt("lo.txt", lo)

            select_sentence = []
            sentences_len = []
            q = []

            sentences_cs = []
            sentences_ce = []
            sentences_ct = []

            for i in range(config.batch_size):
                ques = np.zeros([config.ques_limit, q_emb.shape[-1]],
                                np.float32)
                ques[:q_emb.shape[-2]] = q_emb[i]
                q.append(ques)
                sentences = []
                sentence_len = []
                sum = tensor_dict['sentence_num'][i]
                indexs = np.argsort(-outer[i])
                #RL change
                # indexs = np.random.choice(a=outer.shape[0], size=config.k, replace=False, p=outer)

                for j in range(config.k):
                    top_index = indexs[j]
                    sentence = np.zeros([config.sen_len, sen_emb.shape[-1]],
                                        np.float32)
                    sentence[:sen_emb.shape[-2]] = sen_emb[i][top_index]
                    sentences.append(sentence)
                    sentence_length = np.arange(
                        sum[indexs[j] - 1],
                        sum[indexs[j] - 1] + config.sen_len, 1)
                    sentence_len.append(sentence_length)
                sentence_len = np.array(sentence_len)
                sentences = np.array(sentences)
                select_sentence.append(sentences)
                sentences_len.append(sentence_len)
            select_sentences = np.array(select_sentence)
            sentences_lens = np.array(sentences_len)
            q = np.array(q)

            global_step = sess.run(spaner.global_step) + 1
            qa_id, loss, train_op_span, yp1, yp2, topk,ou1,out2,\
            loss1,loss2,\
                lo1,lo2,lo3,lo4 = sess.run([spaner.qa_id, spaner.loss_span, spaner.train_op,spaner.yp1, spaner.yp2, spaner.topk,
                                                             spaner.out_lo1,spaner.out_lo2,
                                                             spaner.out1, spaner.out2,
                                           spaner.lo1,spaner.lo2,spaner.lo3,spaner.lo4],
                                                            feed_dict={spaner.qa_id: tensor_dict['ids'],

                                                                       spaner.para: tensor_dict['para_idxs'],
                                                                       spaner.para_e: tensor_dict['para_exist_tag'],
                                                                       spaner.para_t: tensor_dict['para_type_tag'],

                                                                       spaner.q: q,
                                                                       spaner.sentence: select_sentences,
                                                                       spaner.sentence_index: sentences_lens,
                                                                       spaner.outer: outer,
                                                                       spaner.para_enc: para_encode,
                                                                       spaner.para_enc_mask: para_enc_mask,
                                                                       spaner.y1: tensor_dict['y1'],
                                                                       spaner.y2: tensor_dict['y2']
                                                                       })
            answer_dict, _ = convert_tokens(train_eval_file, qa_id.tolist(),
                                            yp1.tolist(), yp2.tolist())
            reward = compute_reward(train_eval_file, answer_dict)

            sess.run(
                tf.assign(selector.is_train, tf.constant(True, dtype=tf.bool)))
            sess.run(
                tf.assign(spaner.is_train, tf.constant(False, dtype=tf.bool)))
            qa_id, yp, outer, sel_loss, train_op, policy = sess.run(
                [
                    selector.qa_id, selector.yp, selector.outer, selector.loss,
                    selector.train_op, selector.policy_log_part
                ],
                feed_dict={
                    selector.qa_id: tensor_dict['ids'],
                    selector.q: tensor_dict['ques_idxs'],
                    selector.cs: tensor_dict['context_s_idxs'],
                    selector.y: tensor_dict['y'],
                    selector.ce: tensor_dict['context_s_exist_tag'],
                    selector.ct: tensor_dict['context_type_tag'],
                    selector.qt: tensor_dict['ques_type_tag'],
                    selector.reward: reward
                })
            np.savetxt("topk.txt", topk)
            #            np.savetxt("out1.txt",ou1)
            np.savetxt("l1.txt", lo1)
            np.savetxt("l2.txt", lo2)
            np.savetxt("l3.txt", lo3)
            np.savetxt("l4.txt", lo4)
            np.savetxt("lss1.txt", loss1)

            np.savetxt("lss2.txt", loss2)
            np.savetxt("reward.txt", reward)
            np.savetxt("policy.txt", policy)

            # np.savetxt("loss3.txt", loss3)
            # numpy.savetxt('new.csv', my_matrix, delimiter=',')

            #print(loss3)

            print(loss)
            print("selector_loss" + str(sel_loss))

            batch_no = batch_no + 1
            if global_step % config.period == 0:
                loss_sum = tf.Summary(value=[
                    tf.Summary.Value(tag="model/loss", simple_value=loss),
                ])
                writer.add_summary(loss_sum, global_step)
            if batch_time % config.checkpoint == 0:
                sess.run(
                    tf.assign(selector.is_train,
                              tf.constant(False, dtype=tf.bool)))
                sess.run(
                    tf.assign(spaner.is_train, tf.constant(False,
                                                           dtype=tf.bool)))
                # _, summ = evaluate_batch(
                #     selector, config.val_num_batches, train_eval_file, sess, "train", dp_train)

                metrics, summ = evaluate_span_batch(
                    config, selector, spaner, dev_total // config.batch_size,
                    dev_eval_file, sess, "dev", dp_dev)
                for s in summ:
                    writer.add_summary(s, global_step)
                print('epoch' + str(global_step))
                print('dev_loss:' + str(metrics["loss"]))
                print('F1:' + str(metrics["f1"]))
                print('em:' + str(metrics["exact_match"]))

                sess.run(
                    tf.assign(spaner.is_train, tf.constant(True,
                                                           dtype=tf.bool)))

                dev_loss = metrics["loss"]
                if dev_loss < loss_save:
                    loss_save = dev_loss
                    patience = 0
                else:
                    patience += 1
                if patience >= config.patience:
                    lr /= 2.0
                    loss_save = dev_loss
                    patience = 0
                sess.run(
                    tf.assign(spaner.lr, tf.constant(lr, dtype=tf.float32)))
                for s in summ:
                    writer.add_summary(s, global_step)
                writer.flush()
                filename = os.path.join(config.RL_save_dir,
                                        "model_{}.ckpt".format(global_step))
                saver.save(sess, filename)