def build_train_proc(self):
        # input layer (batch_size, n_steps, input_dim)
        self.encode_input = tf.placeholder(tf.float32, [None, self.max_n_words, self.input_dim])
        self.encode_sent_len = tf.placeholder(tf.int32, [None])
        self.encode_conv_len = tf.placeholder(tf.int32, [self.batch_size])
        self.decode_input = tf.placeholder(tf.float32, [None, self.max_r_words, self.input_dim])
        self.decode_sent_len = tf.placeholder(tf.int32, [None])
        self.is_training = tf.placeholder(tf.bool)
        self.reward = tf.placeholder(tf.float32, [None])
        self.ans_vec = tf.placeholder(tf.float32, [None, self.max_r_words, self.input_dim])
        self.y = tf.placeholder(tf.int32, [None, self.max_r_words])
        self.y_mask = tf.placeholder(tf.float32, [None, self.max_r_words])

        self.encode_input = tf.contrib.layers.dropout(self.encode_input, self.dropout_prob, is_training=self.is_training)
        self.decode_input = tf.contrib.layers.dropout(self.decode_input, self.dropout_prob, is_training=self.is_training)

        sent_outputs, sent_state = layers.dynamic_origin_bilstm_layer(self.encode_input, self.word_lstm_dim, scope_name = 'sent_level_bilstm_rnn', input_len=self.encode_sent_len)
        sent_last_state = tf.concat([sent_state[0][1],sent_state[1][1]],axis=1)
        # sent_outputs = tf.reshape(sent_outputs, shape=[self.batch_size, self.max_n_sentences, self.max_n_words, self.lstm_dim])
        # ind = tf.stack([tf.range(self.batch_size), self.encode_conv_len - 1], axis=1)
        # sent_last_outputs = tf.gather_nd(sent_outputs,indices=ind)

        conv_sents = tf.reshape(sent_last_state,shape = [self.batch_size, self.max_n_sentences, self.lstm_dim])
        # self.sent_last_state_trun = tf.gather_nd(conv_sents, indices=ind)
        conv_outputs, conv_state = layers.dynamic_origin_lstm_layer(conv_sents, self.lstm_dim, 'conv_level_rnn', input_len=self.encode_conv_len)
        self.conv_last_state = conv_state[1]

        with tf.variable_scope('normal'):
            normal_first_W = tf.get_variable('normal_first_W', shape=[self.conv_last_state.shape[1], self.decode_dim], dtype=tf.float32,
                                              initializer=tf.contrib.layers.xavier_initializer())
            normal_first_b = tf.get_variable('normal_first_b', shape=[self.decode_dim], dtype=tf.float32,
                                              initializer=tf.contrib.layers.xavier_initializer())
            normal_second_W = tf.get_variable('normal_second_W', shape=[self.decode_dim, self.decode_dim], dtype=tf.float32,
                                              initializer=tf.contrib.layers.xavier_initializer())
            normal_second_b = tf.get_variable('normal_second_b', shape=[self.decode_dim], dtype=tf.float32,
                                              initializer=tf.contrib.layers.xavier_initializer())
            normal_mean_W = tf.get_variable('normal_mean_W', shape=[self.decode_dim, self.lstm_dim], dtype=tf.float32,
                                              initializer=tf.contrib.layers.xavier_initializer())
            normal_mean_b = tf.get_variable('normal_mean_b', shape=[self.lstm_dim], dtype=tf.float32,
                                              initializer=tf.contrib.layers.xavier_initializer())
            normal_cov_W = tf.get_variable('normal_cov_W', shape=[self.decode_dim, self.lstm_dim], dtype=tf.float32,
                                            initializer=tf.contrib.layers.xavier_initializer())
            normal_cov_b = tf.get_variable('normal_cov_b', shape=[self.lstm_dim], dtype=tf.float32,
                                            initializer=tf.contrib.layers.xavier_initializer())

            normal_first = tf.matmul(self.conv_last_state,normal_first_W) + normal_first_b
            normal_second = tf.matmul(normal_first, normal_second_W) + normal_second_b
            normal_mean = tf.matmul(normal_second, normal_mean_W) + normal_mean_b
            normal_cov =  tf.nn.softplus(tf.matmul(normal_second, normal_cov_W) + normal_cov_b)
            normal_sample = normal_mean + tf.multiply(tf.random_normal(shape=[self.batch_size,self.lstm_dim]), normal_cov)

        self.conv_last_state = self.conv_last_state + normal_sample

        # decoder

        with tf.variable_scope('linear'):
            decoder_input_W = tf.get_variable('sw', shape=[self.conv_last_state.shape[1], self.decode_dim], dtype=tf.float32,
                                              initializer=tf.contrib.layers.xavier_initializer())
            decoder_input_b = tf.get_variable('sb', shape=[self.decode_dim], dtype=tf.float32,
                                              initializer=tf.contrib.layers.xavier_initializer())

            self.decoder_input = tf.matmul(self.conv_last_state, decoder_input_W) + decoder_input_b


        # answer->word predict
        self.embed_word_W = tf.get_variable('embed_word_W', shape=[self.decode_dim, self.n_words], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer())
        self.embed_word_b = tf.get_variable('embed_word_b', shape=[self.n_words], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer())


        # # word dim -> decode_dim
        # self.word_to_lstm_w = tf.get_variable('word_to_lstm_W', shape=[self.input_dim, self.decode_dim], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer())
        # self.word_to_lstm_b = tf.get_variable('word_to_lstm_b', shape=[self.decode_dim], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer())

        # decoder
        with tf.variable_scope('decoder'):
            self.decoder_r = tf.get_variable('decoder_r', shape=[self.decode_dim * 2, self.decode_dim], dtype=tf.float32,
                                             initializer=tf.contrib.layers.xavier_initializer())
            self.decoder_z = tf.get_variable('decoder_z', shape=[self.decode_dim * 2, self.decode_dim], dtype=tf.float32,
                                             initializer=tf.contrib.layers.xavier_initializer())
            self.decoder_w = tf.get_variable('decoder_w', shape=[self.decode_dim * 2, self.decode_dim], dtype=tf.float32,
                                             initializer=tf.contrib.layers.xavier_initializer())

        # embedding layer
        embedding = load_file(self.params['embedding'])
        self.Wemb = tf.constant(embedding, dtype=tf.float32)

        # generate training
        answer_train, train_loss, distribution_train = self.generate_answer_on_training()
        answer_test, test_loss, distribution_test = self.generate_answer_on_testing()

        # final
        variables = tf.trainable_variables()
        regularization_cost = tf.reduce_sum([tf.nn.l2_loss(v) for v in variables])
        self.answer_word_train = answer_train
        self.train_loss = train_loss + self.regularization_beta * regularization_cost
        self.distribution_word_train = distribution_train

        self.answer_word_test = answer_test
        self.test_loss = test_loss + self.regularization_beta * regularization_cost
        self.distribution_word_test = distribution_test

        self.global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False)
        learning_rates = tf.train.exponential_decay(self.params['learning_rate'], self.global_step, decay_steps=self.params['lr_decay_n_iters'],
                                                    decay_rate=self.params['lr_decay_rate'], staircase=True)
        optimizer = tf.train.AdamOptimizer(learning_rates)
        self.train_proc = optimizer.minimize(self.train_loss, global_step=self.global_step)
Example #2
0
    def build_train_proc(self):
        # input layer (batch_size, n_steps, input_dim)
        self.encode_input = tf.placeholder(tf.float32, [None, self.max_n_words, self.input_dim])
        self.encode_sent_len = tf.placeholder(tf.int32, [None])
        self.encode_conv_len = tf.placeholder(tf.int32, [self.batch_size])
        self.decode_input = tf.placeholder(tf.float32, [None, self.max_r_words, self.input_dim])
        self.decode_sent_len = tf.placeholder(tf.int32, [None])
        self.is_training = tf.placeholder(tf.bool)
        self.reward = tf.placeholder(tf.float32, [None])
        self.ans_vec = tf.placeholder(tf.float32, [None, self.max_r_words, self.input_dim])
        self.y = tf.placeholder(tf.int32, [None, self.max_r_words])
        self.y_mask = tf.placeholder(tf.float32, [None, self.max_r_words])
        # self.batch_size = tf.placeholder(tf.int32, [])

        self.encode_input = tf.contrib.layers.dropout(self.encode_input, self.dropout_prob, is_training=self.is_training)
        self.decode_input = tf.contrib.layers.dropout(self.decode_input, self.dropout_prob, is_training=self.is_training)

        sent_outputs, sent_state = layers.dynamic_origin_bilstm_layer(self.encode_input, self.word_lstm_dim, scope_name = 'sent_level_bilstm_rnn', input_len=self.encode_sent_len)
        sent_last_state = tf.concat([sent_state[0][1],sent_state[1][1]],axis=1)
        # sent_last_state = tf.contrib.layers.dropout(sent_last_state, self.dropout_prob, is_training=self.is_training)
        sent_outputs = tf.reshape(sent_outputs, shape=[self.batch_size, self.max_n_sentences, self.max_n_words, self.lstm_dim])
        ind = tf.stack([tf.range(self.batch_size), self.encode_conv_len - 1], axis=1)
        sent_last_outputs = tf.gather_nd(sent_outputs,indices=ind)

        conv_sents = tf.reshape(sent_last_state,shape = [self.batch_size, self.max_n_sentences, self.lstm_dim])
        self.sent_last_state_trun = tf.gather_nd(conv_sents, indices=ind)
        conv_outputs, conv_state = layers.dynamic_origin_lstm_layer(conv_sents, self.lstm_dim, 'conv_level_rnn', input_len=self.encode_conv_len)
        self.conv_last_state = conv_state[1]

        self.sent_features = sent_last_outputs
        self.conv_features = conv_outputs

        self.sent_features = tf.contrib.layers.dropout(self.sent_features, self.dropout_prob, is_training=self.is_training)
        self.conv_features = tf.contrib.layers.dropout(self.conv_features, self.dropout_prob, is_training=self.is_training)


        # with tf.variable_scope("ref_var"):
        #     self.Wsi = tf.get_variable('Wsi', shape=[self.input_dim, self.ref_dim], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer())
        #     self.Wsh = tf.get_variable('Wsh', shape=[self.lstm_dim, self.ref_dim], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer())
        #     self.Wsq = tf.get_variable('Wsq', shape=[self.lstm_dim, self.ref_dim], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer())
        #     self.bias = tf.get_variable('bias', shape=[self.ref_dim], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer())
        #     self.Vs = tf.get_variable('Vs', shape=[self.ref_dim, 1], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer())

        # def cond(idx, times,cell, sents, state,outputs,impotant_outputs):
        #     return  idx < times
        #
        # def body(idx, times, cell, sents, state,outputs,impotant_outputs):
        #     idx = idx + 1
        #     sent = tf.reshape(sents[idx, :],shape=[1,self.lstm_dim])
        #     ref = tf.matmul(state[1], self.Wsh) + tf.matmul(sent, self.Wsi) + self.bias
        #     condition = tf.sigmoid(tf.matmul(ref, self.Vs))
        #     prod = tf.squeeze(condition, 1) > 0.3
        #     (cell_output, state) = cell(sent, state)
        #     outputs.append(cell_output)
        #     return idx, times, cell, sents, state, outputs, impotant_outputs
        #
        #
        #
        # with tf.variable_scope("encode_conv_level"):
        #     for batch in range(self.batch_size):
        #         outputs = list()
        #         impotant_outputs = list()
        #         times = tf.cast(self.encode_conv_len[batch],tf.int32)
        #         idx = 0
        #         sents = conv_sents[batch]
        #         _, _, _, _, _ , outputs, impotant_outputs = tf.while_loop(cond,body,[idx, times, cell_first, sents, state_first,outputs,impotant_outputs])




        # decoder

        # self.decoder_cell = tf.contrib.rnn.GRUCell(self.decode_dim)

        with tf.variable_scope('linear'):
            sent_and_conv_last = tf.concat([self.sent_last_state_trun,self.conv_last_state],axis=1)
            decoder_input_W = tf.get_variable('sw', shape=[self.sent_last_state_trun.shape[1] + self.conv_last_state.shape[1], self.decode_dim], dtype=tf.float32,
                                              initializer=tf.contrib.layers.xavier_initializer())
            decoder_input_b = tf.get_variable('sb', shape=[self.decode_dim], dtype=tf.float32,
                                              initializer=tf.contrib.layers.xavier_initializer())

            self.decoder_input = tf.matmul(sent_and_conv_last, decoder_input_W) + decoder_input_b


        # answer->word predict
        self.embed_word_W = tf.get_variable('embed_word_W', shape=[self.decode_dim, self.n_words], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer())
        self.embed_word_b = tf.get_variable('embed_word_b', shape=[self.n_words], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer())


        # # word dim -> decode_dim
        # self.word_to_lstm_w = tf.get_variable('word_to_lstm_W', shape=[self.input_dim, self.decode_dim], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer())
        # self.word_to_lstm_b = tf.get_variable('word_to_lstm_b', shape=[self.decode_dim], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer())


        # decoder attention layer
        with tf.variable_scope('decoder_attention'):
            self.attention_w_x = tf.get_variable('attention_w_x', shape=[self.lstm_dim, self.attention_dim], dtype=tf.float32,
                                                 initializer=tf.contrib.layers.xavier_initializer())
            self.attention_w_h = tf.get_variable('attention_w_h', shape=[self.decode_dim, self.attention_dim], dtype=tf.float32,
                                                 initializer=tf.contrib.layers.xavier_initializer())
            self.attention_b = tf.get_variable('attention_b', shape=[self.attention_dim], dtype=tf.float32,
                                               initializer=tf.contrib.layers.xavier_initializer())
            self.attention_a = tf.get_variable('attention_a', shape=[self.attention_dim, 1], dtype=tf.float32,
                                               initializer=tf.contrib.layers.xavier_initializer())
            self.attention_to_decoder = tf.get_variable('attention_to_decoder', shape=[self.lstm_dim, self.decode_dim], dtype=tf.float32,
                                                        initializer=tf.contrib.layers.xavier_initializer())
        # decoder
        with tf.variable_scope('decoder'):
            self.decoder_r = tf.get_variable('decoder_r', shape=[self.decode_dim * 4, self.decode_dim], dtype=tf.float32,
                                             initializer=tf.contrib.layers.xavier_initializer())
            self.decoder_z = tf.get_variable('decoder_z', shape=[self.decode_dim * 4, self.decode_dim], dtype=tf.float32,
                                             initializer=tf.contrib.layers.xavier_initializer())
            self.decoder_w = tf.get_variable('decoder_w', shape=[self.decode_dim * 4, self.decode_dim], dtype=tf.float32,
                                             initializer=tf.contrib.layers.xavier_initializer())

        # embedding layer
        embedding = load_file(self.params['embedding'])
        self.Wemb = tf.constant(embedding, dtype=tf.float32)

        # generate training
        answer_train, train_loss, distribution_train = self.generate_answer_on_training()
        answer_test, test_loss, distribution_test = self.generate_answer_on_testing()

        # final
        variables = tf.trainable_variables()
        regularization_cost = tf.reduce_sum([tf.nn.l2_loss(v) for v in variables])
        self.answer_word_train = answer_train
        self.train_loss = train_loss + self.regularization_beta * regularization_cost
        self.distribution_word_train = distribution_train

        self.answer_word_test = answer_test
        self.test_loss = test_loss + self.regularization_beta * regularization_cost
        self.distribution_word_test = distribution_test

        self.global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False)
        learning_rates = tf.train.exponential_decay(self.params['learning_rate'], self.global_step, decay_steps=self.params['lr_decay_n_iters'],
                                                    decay_rate=self.params['lr_decay_rate'], staircase=True)
        optimizer = tf.train.AdamOptimizer(learning_rates)
        self.train_proc = optimizer.minimize(self.train_loss, global_step=self.global_step)
    def build_train_proc(self):
        # input layer (batch_size, n_steps, input_dim)
        self.encode_input = tf.placeholder(
            tf.float32, [None, self.max_n_words, self.input_dim])
        self.encode_sent_len = tf.placeholder(tf.int32, [None])
        self.encode_conv_len = tf.placeholder(tf.int32, [self.batch_size])
        self.decode_input = tf.placeholder(
            tf.float32, [None, self.max_r_words, self.input_dim])
        self.decode_sent_len = tf.placeholder(tf.int32, [None])
        self.is_training = tf.placeholder(tf.bool)
        self.reward = tf.placeholder(tf.float32, [None])
        self.ans_vec = tf.placeholder(
            tf.float32, [None, self.max_r_words - 1, self.input_dim])
        self.y = tf.placeholder(tf.int32, [None, self.max_r_words])
        self.y_mask = tf.placeholder(tf.float32, [None, self.max_r_words])
        # self.batch_size = tf.placeholder(tf.int32, [])

        # self.encode_input = tf.contrib.layers.dropout(self.encode_input, self.dropout_prob, is_training=self.is_training)
        # self.decode_input = tf.contrib.layers.dropout(self.decode_input, self.dropout_prob, is_training=self.is_training)

        sent_outputs, sent_state = layers.dynamic_origin_bilstm_layer(
            self.encode_input,
            self.word_lstm_dim,
            scope_name='sent_level_bilstm_rnn',
            input_len=self.encode_sent_len)
        sent_last_state = tf.concat([sent_state[0][1], sent_state[1][1]],
                                    axis=1)
        # sent_last_state = tf.contrib.layers.dropout(sent_last_state, self.dropout_prob, is_training=self.is_training)
        sent_outputs = tf.reshape(sent_outputs,
                                  shape=[
                                      self.batch_size, self.max_n_sentences,
                                      self.max_n_words, self.lstm_dim
                                  ])
        ind = tf.stack([tf.range(self.batch_size), self.encode_conv_len - 1],
                       axis=1)
        sent_last_outputs = tf.gather_nd(sent_outputs, indices=ind)

        conv_sents = tf.reshape(
            sent_last_state,
            shape=[self.batch_size, self.max_n_sentences, self.lstm_dim])
        self.sent_last_state_trun = tf.gather_nd(conv_sents, indices=ind)
        conv_outputs, conv_state = layers.dynamic_origin_lstm_layer(
            conv_sents,
            self.lstm_dim,
            'conv_level_rnn',
            input_len=self.encode_conv_len)
        self.conv_last_state = conv_state[1]

        self.sent_outputs = sent_outputs
        self.ind = ind
        self.sent_features = sent_last_outputs
        self.conv_features = conv_outputs

        # decoder

        self.decode_cell = tf.contrib.rnn.BasicLSTMCell(self.decode_dim,
                                                        state_is_tuple=True)

        with tf.variable_scope('linear'):
            sent_and_conv_last = tf.concat(
                [self.sent_last_state_trun, self.conv_last_state], axis=1)
            decoder_input_W = tf.get_variable(
                'sw',
                shape=[
                    self.sent_last_state_trun.shape[1] +
                    self.conv_last_state.shape[1], self.decode_dim
                ],
                dtype=tf.float32,
                initializer=tf.contrib.layers.xavier_initializer())
            decoder_input_b = tf.get_variable(
                'sb',
                shape=[self.decode_dim],
                dtype=tf.float32,
                initializer=tf.contrib.layers.xavier_initializer())

            self.decoder_input = tf.matmul(sent_and_conv_last,
                                           decoder_input_W) + decoder_input_b

        # answer->word predict
        self.embed_word_W = tf.get_variable(
            'embed_word_W',
            shape=[self.decode_dim, self.n_words],
            dtype=tf.float32,
            initializer=tf.contrib.layers.xavier_initializer())
        self.embed_word_b = tf.get_variable(
            'embed_word_b',
            shape=[self.n_words],
            dtype=tf.float32,
            initializer=tf.contrib.layers.xavier_initializer())

        # word dim -> decode_dim
        self.word_to_lstm_w = tf.get_variable(
            'word_to_lstm_W',
            shape=[self.input_dim, self.decode_dim],
            dtype=tf.float32,
            initializer=tf.contrib.layers.xavier_initializer())
        self.word_to_lstm_b = tf.get_variable(
            'word_to_lstm_b',
            shape=[self.decode_dim],
            dtype=tf.float32,
            initializer=tf.contrib.layers.xavier_initializer())

        # decoder attention layer
        with tf.variable_scope('decoder_attention'):
            self.attention_w_x = tf.get_variable(
                'attention_w_x',
                shape=[self.lstm_dim, self.attention_dim],
                dtype=tf.float32,
                initializer=tf.contrib.layers.xavier_initializer())
            self.attention_w_h = tf.get_variable(
                'attention_w_h',
                shape=[self.decode_dim, self.attention_dim],
                dtype=tf.float32,
                initializer=tf.contrib.layers.xavier_initializer())
            self.attention_b = tf.get_variable(
                'attention_b',
                shape=[self.attention_dim],
                dtype=tf.float32,
                initializer=tf.contrib.layers.xavier_initializer())
            self.attention_a = tf.get_variable(
                'attention_a',
                shape=[self.attention_dim, 1],
                dtype=tf.float32,
                initializer=tf.contrib.layers.xavier_initializer())
            self.attention_to_decoder = tf.get_variable(
                'attention_to_decoder',
                shape=[self.lstm_dim, self.decode_dim],
                dtype=tf.float32,
                initializer=tf.contrib.layers.xavier_initializer())
        # decoder
        with tf.variable_scope('decoder'):
            self.decoder_r = tf.get_variable(
                'decoder_r',
                shape=[self.decode_dim * 3, self.decode_dim],
                dtype=tf.float32,
                initializer=tf.contrib.layers.xavier_initializer())
            self.decoder_z = tf.get_variable(
                'decoder_z',
                shape=[self.decode_dim * 3, self.decode_dim],
                dtype=tf.float32,
                initializer=tf.contrib.layers.xavier_initializer())
            self.decoder_w = tf.get_variable(
                'decoder_w',
                shape=[self.decode_dim * 3, self.decode_dim],
                dtype=tf.float32,
                initializer=tf.contrib.layers.xavier_initializer())

        # embedding layer
        embedding = load_file(self.params['embedding'])
        self.Wemb = tf.constant(embedding, dtype=tf.float32)

        # generate training
        answer_train, train_loss = self.generate_answer_on_training()
        answer_test, test_loss = self.generate_answer_on_testing()

        # final
        variables = tf.trainable_variables()
        regularization_cost = tf.reduce_sum(
            [tf.nn.l2_loss(v) for v in variables])
        self.answer_word_train = answer_train
        self.train_loss = train_loss + self.regularization_beta * regularization_cost

        self.answer_word_test = answer_test
        self.test_loss = test_loss + self.regularization_beta * regularization_cost

        self.global_step = tf.get_variable(
            'global_step', [],
            initializer=tf.constant_initializer(0),
            trainable=False)
        learning_rates = tf.train.exponential_decay(
            self.params['learning_rate'],
            self.global_step,
            decay_steps=self.params['lr_decay_n_iters'],
            decay_rate=self.params['lr_decay_rate'],
            staircase=True)
        optimizer = tf.train.AdamOptimizer(learning_rates)
        self.train_proc = optimizer.minimize(self.train_loss,
                                             global_step=self.global_step)
    def build_train_proc(self):
        # input layer (batch_size, n_steps, input_dim)
        self.encode_input = tf.placeholder(
            tf.float32, [None, self.max_n_words, self.input_dim])
        self.encode_sent_len = tf.placeholder(tf.int32, [None])
        self.encode_conv_len = tf.placeholder(tf.int32, [self.batch_size])
        self.is_training = tf.placeholder(tf.bool)
        self.reward = tf.placeholder(tf.float32, [None])

        self.ans_vec_forward = tf.placeholder(
            tf.float32, [None, self.max_r_f_words, self.input_dim])
        self.y_forward = tf.placeholder(tf.int32, [None, self.max_r_f_words])
        self.y_mask_forward = tf.placeholder(tf.float32,
                                             [None, self.max_r_f_words])

        self.ans_vec_entire = tf.placeholder(
            tf.float32, [None, self.max_r_words, self.input_dim])
        self.y_entire = tf.placeholder(tf.int32, [None, self.max_r_words])
        self.y_mask_entire = tf.placeholder(tf.float32,
                                            [None, self.max_r_words])

        self.y_forward_generation = tf.placeholder(tf.int32,
                                                   [None, self.max_r_words])
        # self.batch_size = tf.placeholder(tf.int32, [])

        self.encode_input = tf.contrib.layers.dropout(
            self.encode_input, self.dropout_prob, is_training=self.is_training)

        sent_outputs, sent_state = layers.dynamic_origin_bilstm_layer(
            self.encode_input,
            self.word_lstm_dim,
            scope_name='sent_level_bilstm_rnn',
            input_len=self.encode_sent_len)
        sent_last_state = tf.concat([sent_state[0][1], sent_state[1][1]],
                                    axis=1)
        # sent_last_state = tf.contrib.layers.dropout(sent_last_state, self.dropout_prob, is_training=self.is_training)
        sent_outputs = tf.reshape(sent_outputs,
                                  shape=[
                                      self.batch_size, self.max_n_sentences,
                                      self.max_n_words, self.lstm_dim
                                  ])
        ind = tf.stack([tf.range(self.batch_size), self.encode_conv_len - 1],
                       axis=1)
        sent_last_outputs = tf.gather_nd(sent_outputs, indices=ind)

        conv_sents = tf.reshape(
            sent_last_state,
            shape=[self.batch_size, self.max_n_sentences, self.lstm_dim])
        self.sent_last_state_trun = tf.gather_nd(conv_sents, indices=ind)
        conv_outputs, conv_state = layers.dynamic_origin_lstm_layer(
            conv_sents,
            self.lstm_dim,
            'conv_level_rnn',
            input_len=self.encode_conv_len)
        self.conv_last_state = conv_state[1]

        self.sent_features = sent_last_outputs
        self.conv_features = conv_outputs

        self.sent_features = tf.contrib.layers.dropout(
            self.sent_features,
            self.dropout_prob,
            is_training=self.is_training)
        self.conv_features = tf.contrib.layers.dropout(
            self.conv_features,
            self.dropout_prob,
            is_training=self.is_training)

        # decoder

        with tf.variable_scope('linear'):
            sent_and_conv_last = tf.concat(
                [self.sent_last_state_trun, self.conv_last_state], axis=1)
            decoder_input_W = tf.get_variable(
                'sw',
                shape=[
                    self.sent_last_state_trun.shape[1] +
                    self.conv_last_state.shape[1], self.decode_dim
                ],
                dtype=tf.float32,
                initializer=tf.contrib.layers.xavier_initializer())
            decoder_input_b = tf.get_variable(
                'sb',
                shape=[self.decode_dim],
                dtype=tf.float32,
                initializer=tf.contrib.layers.xavier_initializer())

            self.decoder_input = tf.matmul(sent_and_conv_last,
                                           decoder_input_W) + decoder_input_b

        self.define_var()

        # embedding layer
        embedding = load_file(self.params['embedding'])
        self.Wemb = tf.constant(embedding, dtype=tf.float32)

        # generate training
        forward_answer_train, forward_train_loss, forward_distribution_train = self.generate_forward_answer_on_training(
        )
        entire_answer_train, entire_train_loss, entire_distribution_train = self.generate_entire_answer_on_training(
        )
        forward_answer_test, forward_test_loss, forward_distribution_test = self.generate_forward_answer_on_testing(
        )
        entire_answer_test, entire_test_loss, entire_distribution_test = self.generate_entire_answer_on_testing(
        )

        # final
        variables = tf.trainable_variables()
        regularization_cost = tf.reduce_sum(
            [tf.nn.l2_loss(v) for v in variables])
        self.forward_answer_word_train = forward_answer_train
        self.answer_word_train = entire_answer_train
        self.forward_train_loss = forward_train_loss + self.regularization_beta * regularization_cost
        self.train_loss = entire_train_loss + self.regularization_beta * regularization_cost
        self.distribution_word_train = entire_distribution_train

        self.forward_answer_word_test = forward_answer_test
        self.answer_word_test = entire_answer_test
        self.test_loss = entire_test_loss
        self.distribution_word_test = entire_distribution_test

        self.forward_global_step = tf.get_variable(
            'forward_global_step', [],
            initializer=tf.constant_initializer(0),
            trainable=False)
        self.global_step = tf.get_variable(
            'global_step', [],
            initializer=tf.constant_initializer(0),
            trainable=False)
        forward_learning_rates = tf.train.exponential_decay(
            self.params['learning_rate'],
            self.global_step,
            decay_steps=self.params['lr_decay_n_iters'],
            decay_rate=self.params['lr_decay_rate'],
            staircase=True)
        learning_rates = tf.train.exponential_decay(
            self.params['learning_rate'],
            self.global_step,
            decay_steps=self.params['lr_decay_n_iters'],
            decay_rate=self.params['lr_decay_rate'],
            staircase=True)
        forward_optimizer = tf.train.AdamOptimizer(forward_learning_rates)
        optimizer = tf.train.AdamOptimizer(learning_rates)
        self.forward_train_proc = forward_optimizer.minimize(
            self.forward_train_loss, global_step=self.forward_global_step)
        self.train_proc = optimizer.minimize(self.train_loss,
                                             global_step=self.global_step)