class NihongoNet(Model): def __init__(self): super(NihongoNet, self).__init__() self._embedding = Embedding(30000, 3072, input_length=128) self._encoder_0 = Encoder() self._encoder_1 = Encoder() self._decoder_0 = Decoder() self._decoder_1 = Decoder() self._dense = TimeDistributed(Dense(3072, activation="softmax")) def call(self, x, training=False): x = self._embedding(x) x = self._encoder_0(x, training=training) x = self._encoder_1(x, training=training) enc_out = x x = self._decoder_0(x, enc_out, training=training) x = self._decoder_1(x, enc_out, training=training) x = self._dense(x) return x def prep_build(self): self._encoder_0.build((32, 128, 3072)) self._encoder_1.build((32, 128, 3072)) self._decoder_0.build((32, 128, 3072)) self._decoder_1.build((32, 128, 3072))
def build_decoder(self, decoder_emb_inp, encoder_outputs, reuse=False): with tf.variable_scope("Decoder", reuse=reuse): decoder = Decoder(num_layers=self.num_layers, num_heads=self.num_heads, linear_key_dim=self.linear_key_dim, linear_value_dim=self.linear_value_dim, model_dim=self.emb_dim, ffn_dim=self.ffn_dim) return decoder.build(decoder_emb_inp, encoder_outputs, self.x_len)
def build_decoder(self, decoder_emb_inp, encoder_outputs, dec_bias, attention_bias, reuse=False): enc_dec_bias = attention_bias with tf.variable_scope( "Decoder", reuse=reuse, initializer=tf.contrib.layers.xavier_initializer()): decoder = Decoder(num_layers=self.num_layers, num_heads=self.num_heads, linear_key_dim=self.linear_key_dim, linear_value_dim=self.linear_value_dim, model_dim=self.hidden_dim, ffn_dim=self.ffn_dim) return decoder.build(decoder_emb_inp, encoder_outputs, dec_bias, enc_dec_bias)