def encode_decode(self):
        actor_embedding = embed_seq(input_seq=self.input_,
                                    from_=self.dimension,
                                    to_=self.input_embed,
                                    is_training=self.is_training,
                                    BN=True,
                                    initializer=self.initializer)
        actor_encoding = encode_seq(input_seq=actor_embedding,
                                    input_dim=self.input_embed,
                                    num_stacks=self.num_stacks,
                                    num_heads=self.num_heads,
                                    num_neurons=self.num_neurons,
                                    is_training=self.is_training)
        if self.is_training == False:
            actor_encoding = tf.tile(actor_encoding, [self.batch_size, 1, 1])

        idx_list, log_probs, entropies = [], [], [
        ]  # tours index, log_probs, entropies
        mask = tf.zeros((self.batch_size, self.max_length))  # mask for actions

        n_hidden = actor_encoding.get_shape().as_list()[2]  # input_embed
        W_ref = tf.get_variable("W_ref", [1, n_hidden, self.num_units],
                                initializer=self.initializer)
        W_q = tf.get_variable("W_q", [self.query_dim, self.num_units],
                              initializer=self.initializer)
        v = tf.get_variable("v", [self.num_units],
                            initializer=self.initializer)

        encoded_ref = tf.nn.conv1d(
            actor_encoding, W_ref, 1, "VALID"
        )  # actor_encoding is the ref for actions [Batch size, seq_length, n_hidden]
        query1 = tf.zeros((self.batch_size, n_hidden))  # initial state
        query2 = tf.zeros((self.batch_size, n_hidden))  # previous state
        query3 = tf.zeros(
            (self.batch_size, n_hidden))  # previous previous state

        W_1 = tf.get_variable(
            "W_1", [n_hidden, self.query_dim],
            initializer=self.initializer)  # update trajectory (state)
        W_2 = tf.get_variable("W_2", [n_hidden, self.query_dim],
                              initializer=self.initializer)
        W_3 = tf.get_variable("W_3", [n_hidden, self.query_dim],
                              initializer=self.initializer)

        for step in range(self.max_length):  # sample from POINTER
            query = tf.nn.relu(
                tf.matmul(query1, W_1) + tf.matmul(query2, W_2) +
                tf.matmul(query3, W_3))
            logits = pointer(encoded_ref=encoded_ref,
                             query=query,
                             mask=mask,
                             W_ref=W_ref,
                             W_q=W_q,
                             v=v,
                             C=config.C,
                             temperature=config.temperature)
            prob = distr.Categorical(logits)  # logits = masked_scores
            idx = prob.sample()

            idx_list.append(idx)  # tour index
            log_probs.append(prob.log_prob(idx))  # log prob
            entropies.append(prob.entropy())  # entropies
            mask = mask + tf.one_hot(idx, self.max_length)  # mask

            idx_ = tf.stack([tf.range(self.batch_size, dtype=tf.int32), idx],
                            1)  # idx with batch
            query3 = query2
            query2 = query1
            query1 = tf.gather_nd(actor_encoding,
                                  idx_)  # update trajectory (state)

        idx_list.append(idx_list[0])  # return to start
        self.tour = tf.stack(idx_list, axis=1)  # permutations
        self.log_prob = tf.add_n(
            log_probs)  # corresponding log-probability for backprop
        self.entropies = tf.add_n(entropies)
        tf.summary.scalar('log_prob_mean', tf.reduce_mean(self.log_prob))
        tf.summary.scalar('entropies_mean', tf.reduce_mean(self.entropies))
Esempio n. 2
0
 def r_net(self):
     hps = self._hps
     with tf.variable_scope('question_encoding'):
         q_rep = self.question_inputs
         q_states = []
         for i in xrange(hps.num_layers):
             with tf.variable_scope('layer%d' % i):
                 q_cell = tf.contrib.rnn.GRUCell(hps.size)
                 q_rep, q_state = tf.nn.bidirectional_dynamic_rnn(
                     q_cell,
                     q_cell,
                     q_rep,
                     sequence_length=self.question_lens,
                     dtype=self.dtype)
                 q_rep = tf.concat(q_rep, axis=-1)
                 q_states.append(q_state)
         assert q_rep.get_shape()[-1].value == 2 * hps.size
     with tf.variable_scope('context_encoding'):
         c_rep = self.context_inputs
         for i in xrange(hps.num_layers):
             with tf.variable_scope('layer%d' % i):
                 c_cell = tf.contrib.rnn.GRUCell(hps.size)
                 c_rep, c_state = tf.nn.bidirectional_dynamic_rnn(
                     c_cell,
                     c_cell,
                     c_rep,
                     initial_state_fw=q_states[i][0],
                     initial_state_bw=q_states[i][1],
                     sequence_length=self.context_lens)
                 c_rep = tf.concat(c_rep, axis=-1)
         assert c_rep.get_shape()[-1].value == 2 * hps.size
     with tf.variable_scope('question_aware'):
         q_a_cell = tf.contrib.rnn.GRUCell(hps.size)
         context_q = multihead_attention(c_rep, q_rep)
         inputs = sfu(c_rep, context_q)
         c_rep, state = tf.nn.bidirectional_dynamic_rnn(q_a_cell,
                                                        q_a_cell,
                                                        inputs,
                                                        self.context_lens,
                                                        dtype=self.dtype)
         c_rep = tf.concat(c_rep, axis=-1)
     with tf.variable_scope('self_attention'):
         s_a_cell = tf.contrib.rnn.GRUCell(hps.size)
         context_c = multihead_attention(c_rep, c_rep)
         inputs = sfu(c_rep, context_c)
         c_rep, state = tf.nn.bidirectional_dynamic_rnn(s_a_cell,
                                                        s_a_cell,
                                                        inputs,
                                                        self.context_lens,
                                                        dtype=self.dtype)
         c_rep = tf.concat(c_rep, axis=-1)
         # if hps.mode == 'train':
         #     c_rep = tf.nn.dropout(c_rep, 1.0 - hps.dropout_rate)
         assert c_rep.get_shape()[-1].value == 2 * hps.size
     with tf.variable_scope('output_layer'):
         answer_cell = tf.contrib.rnn.GRUCell(2 * hps.size)
         with tf.variable_scope('pointer'):
             v_q = tf.get_variable('question_parameters',
                                   [hps.batch_size, 2 * hps.size],
                                   self.dtype,
                                   tf.truncated_normal_initializer())
             _, state = pointer(q_rep, v_q, answer_cell)
             tf.get_variable_scope().reuse_variables()
             start_pos_scores, state = pointer(c_rep, state, answer_cell)
             tf.get_variable_scope().reuse_variables()
             end_pos_scores, state = pointer(c_rep, state, answer_cell)
             self.pos_scores = [start_pos_scores, end_pos_scores]