def forward(X, reuse=None): with tf.variable_scope('embed_seq', reuse=reuse): encoded = embed_seq(X, self.vocab_size, self.hidden_units, zero_pad=True, scale=True) with tf.variable_scope('pos_enc', reuse=reuse): encoded += learned_positional_encoding(X, self.hidden_units, zero_pad=False, scale=False) encoded = tf.layers.dropout(encoded, self.dropout_rate, training=self.is_training) for i in range(self.n_layers): with tf.variable_scope('attn%d' % i, reuse=reuse): encoded = self_multihead_attn( queries=encoded, keys=encoded, num_units=self.hidden_units, num_heads=self.num_heads, dropout_rate=self.dropout_rate, is_training=self.is_training) with tf.variable_scope('feedforward%d' % i, reuse=reuse): encoded = pointwise_feedforward( encoded, num_units=[4 * self.hidden_units, self.hidden_units], activation=tf.nn.elu) return tf.layers.dense(encoded, self.vocab_size)
def add_forward_path(self): with tf.variable_scope('encoder_embedding'): encoded = embed_seq(self.X, self.vocab_size, self.hidden_units, zero_pad=False, scale=True) with tf.variable_scope('encoder_positional_encoding'): encoded += learned_positional_encoding(self.X, self.hidden_units, zero_pad=False, scale=False) with tf.variable_scope('encoder_dropout'): encoded = tf.layers.dropout(encoded, self.dropout_rate, training=self.is_training) for i in range(self.num_blocks): with tf.variable_scope('encoder_attn_%d' % i): encoded = multihead_attn(queries=encoded, keys=encoded, num_units=self.hidden_units, num_heads=self.num_heads, dropout_rate=self.dropout_rate, is_training=self.is_training) with tf.variable_scope('encoder_feedforward_%d' % i): encoded = pointwise_feedforward( encoded, num_units=[self.hidden_units, self.hidden_units], activation=tf.nn.elu) self.logits = tf.layers.dense(encoded, self.n_out)