def build_critic(self):
        critic_embedding = embed_seq(input_seq=self.input_,
                                     from_=self.dimension,
                                     to_=self.input_embed,
                                     is_training=self.is_training,
                                     BN=True,
                                     initializer=self.initializer)
        critic_encoding = encode_seq(input_seq=critic_embedding,
                                     input_dim=self.input_embed,
                                     num_stacks=self.num_stacks,
                                     num_heads=self.num_heads,
                                     num_neurons=self.num_neurons,
                                     is_training=self.is_training)
        frame = full_glimpse(
            ref=critic_encoding,
            from_=self.input_embed,
            to_=self.num_units,
            initializer=tf.contrib.layers.xavier_initializer(
            ))  # Glimpse on critic_encoding [Batch_size, input_embed]

        with tf.variable_scope("ffn"):  #  2 dense layers for predictions
            h0 = tf.layers.dense(frame,
                                 self.num_neurons_critic,
                                 activation=tf.nn.relu,
                                 kernel_initializer=self.initializer)
            w1 = tf.get_variable("w1", [self.num_neurons_critic, 1],
                                 initializer=self.initializer)
            b1 = tf.Variable(self.init_B, name="b1")
            self.predictions = tf.squeeze(tf.matmul(h0, w1) + b1)
            tf.summary.scalar('predictions_mean',
                              tf.reduce_mean(self.predictions))
Beispiel #2
0
 def forward(X, reuse=None):
     with tf.variable_scope('embed_seq', reuse=reuse):
         encoded = embed_seq(X,
                             self.vocab_size,
                             self.hidden_units,
                             zero_pad=True,
                             scale=True)
     with tf.variable_scope('pos_enc', reuse=reuse):
         encoded += learned_positional_encoding(X,
                                                self.hidden_units,
                                                zero_pad=False,
                                                scale=False)
     encoded = tf.layers.dropout(encoded,
                                 self.dropout_rate,
                                 training=self.is_training)
     for i in range(self.n_layers):
         with tf.variable_scope('attn%d' % i, reuse=reuse):
             encoded = self_multihead_attn(
                 queries=encoded,
                 keys=encoded,
                 num_units=self.hidden_units,
                 num_heads=self.num_heads,
                 dropout_rate=self.dropout_rate,
                 is_training=self.is_training)
         with tf.variable_scope('feedforward%d' % i, reuse=reuse):
             encoded = pointwise_feedforward(
                 encoded,
                 num_units=[4 * self.hidden_units, self.hidden_units],
                 activation=tf.nn.elu)
     return tf.layers.dense(encoded, self.vocab_size)
 def add_forward_path(self):
     with tf.variable_scope('encoder_embedding'):
         encoded = embed_seq(self.X,
                             self.vocab_size,
                             self.hidden_units,
                             zero_pad=False,
                             scale=True)
     with tf.variable_scope('encoder_positional_encoding'):
         encoded += learned_positional_encoding(self.X,
                                                self.hidden_units,
                                                zero_pad=False,
                                                scale=False)
     with tf.variable_scope('encoder_dropout'):
         encoded = tf.layers.dropout(encoded,
                                     self.dropout_rate,
                                     training=self.is_training)
     for i in range(self.num_blocks):
         with tf.variable_scope('encoder_attn_%d' % i):
             encoded = multihead_attn(queries=encoded,
                                      keys=encoded,
                                      num_units=self.hidden_units,
                                      num_heads=self.num_heads,
                                      dropout_rate=self.dropout_rate,
                                      is_training=self.is_training)
         with tf.variable_scope('encoder_feedforward_%d' % i):
             encoded = pointwise_feedforward(
                 encoded,
                 num_units=[self.hidden_units, self.hidden_units],
                 activation=tf.nn.elu)
     self.logits = tf.layers.dense(encoded, self.n_out)
    def encode_decode(self):
        actor_embedding = embed_seq(input_seq=self.input_,
                                    from_=self.dimension,
                                    to_=self.input_embed,
                                    is_training=self.is_training,
                                    BN=True,
                                    initializer=self.initializer)
        actor_encoding = encode_seq(input_seq=actor_embedding,
                                    input_dim=self.input_embed,
                                    num_stacks=self.num_stacks,
                                    num_heads=self.num_heads,
                                    num_neurons=self.num_neurons,
                                    is_training=self.is_training)
        if self.is_training == False:
            actor_encoding = tf.tile(actor_encoding, [self.batch_size, 1, 1])

        idx_list, log_probs, entropies = [], [], [
        ]  # tours index, log_probs, entropies
        mask = tf.zeros((self.batch_size, self.max_length))  # mask for actions

        n_hidden = actor_encoding.get_shape().as_list()[2]  # input_embed
        W_ref = tf.get_variable("W_ref", [1, n_hidden, self.num_units],
                                initializer=self.initializer)
        W_q = tf.get_variable("W_q", [self.query_dim, self.num_units],
                              initializer=self.initializer)
        v = tf.get_variable("v", [self.num_units],
                            initializer=self.initializer)

        encoded_ref = tf.nn.conv1d(
            actor_encoding, W_ref, 1, "VALID"
        )  # actor_encoding is the ref for actions [Batch size, seq_length, n_hidden]
        query1 = tf.zeros((self.batch_size, n_hidden))  # initial state
        query2 = tf.zeros((self.batch_size, n_hidden))  # previous state
        query3 = tf.zeros(
            (self.batch_size, n_hidden))  # previous previous state

        W_1 = tf.get_variable(
            "W_1", [n_hidden, self.query_dim],
            initializer=self.initializer)  # update trajectory (state)
        W_2 = tf.get_variable("W_2", [n_hidden, self.query_dim],
                              initializer=self.initializer)
        W_3 = tf.get_variable("W_3", [n_hidden, self.query_dim],
                              initializer=self.initializer)

        for step in range(self.max_length):  # sample from POINTER
            query = tf.nn.relu(
                tf.matmul(query1, W_1) + tf.matmul(query2, W_2) +
                tf.matmul(query3, W_3))
            logits = pointer(encoded_ref=encoded_ref,
                             query=query,
                             mask=mask,
                             W_ref=W_ref,
                             W_q=W_q,
                             v=v,
                             C=config.C,
                             temperature=config.temperature)
            prob = distr.Categorical(logits)  # logits = masked_scores
            idx = prob.sample()

            idx_list.append(idx)  # tour index
            log_probs.append(prob.log_prob(idx))  # log prob
            entropies.append(prob.entropy())  # entropies
            mask = mask + tf.one_hot(idx, self.max_length)  # mask

            idx_ = tf.stack([tf.range(self.batch_size, dtype=tf.int32), idx],
                            1)  # idx with batch
            query3 = query2
            query2 = query1
            query1 = tf.gather_nd(actor_encoding,
                                  idx_)  # update trajectory (state)

        idx_list.append(idx_list[0])  # return to start
        self.tour = tf.stack(idx_list, axis=1)  # permutations
        self.log_prob = tf.add_n(
            log_probs)  # corresponding log-probability for backprop
        self.entropies = tf.add_n(entropies)
        tf.summary.scalar('log_prob_mean', tf.reduce_mean(self.log_prob))
        tf.summary.scalar('entropies_mean', tf.reduce_mean(self.entropies))