def _build_graph(self): """ Defines the model graph. """ # Initialize layers with tf.variable_scope(self.name): for layer_id in range(1, self.config.transformer_enc_depth + 1): layer_name = 'layer_{:d}'.format(layer_id) # Check if constructed layer is final if layer_id == self.config.transformer_enc_depth: self.is_final_layer = True # Specify ffn dimensions sequence ffn_dims = [self.config.transformer_ffn_hidden_size, self.config.state_size] with tf.variable_scope(layer_name): # Build layer blocks (see layers.py) self_attn_block = AttentionBlock(self.config, self.float_dtype, self_attention=True, training=self.training) cross_attn_block = AttentionBlock(self.config, self.float_dtype, self_attention=False, training=self.training, from_rnn=self.from_rnn) ffn_block = FFNBlock(self.config, ffn_dims, self.float_dtype, is_final=self.is_final_layer, training=self.training) # Maintain layer-wise dict entries for easier data-passing (may change later) self.decoder_stack[layer_id] = dict() self.decoder_stack[layer_id]['self_attn'] = self_attn_block self.decoder_stack[layer_id]['cross_attn'] = cross_attn_block self.decoder_stack[layer_id]['ffn'] = ffn_block
def _build_graph(self): """ Defines the model graph. """ # Initialize layers with tf.compat.v1.variable_scope(self.name): if self.config.transformer_dropout_embeddings > 0: self.dropout_embedding = tf.keras.layers.Dropout( rate=self.config.transformer_dropout_embeddings) else: self.dropout_embedding = None for layer_id in range(1, self.config.transformer_enc_depth + 1): layer_name = 'layer_{:d}'.format(layer_id) # Check if constructed layer is final if layer_id == self.config.transformer_enc_depth: self.is_final_layer = True # Specify ffn dimensions sequence ffn_dims = [ self.config.transformer_ffn_hidden_size, self.config.state_size ] with tf.compat.v1.variable_scope(layer_name): # Build layer blocks (see layers.py) self_attn_block = AttentionBlock(self.config, FLOAT_DTYPE, self_attention=True, training=self.training) ffn_block = FFNBlock(self.config, ffn_dims, FLOAT_DTYPE, is_final=self.is_final_layer, training=self.training) # Maintain layer-wise dict entries for easier data-passing (may change later) self.encoder_stack[layer_id] = dict() self.encoder_stack[layer_id]['self_attn'] = self_attn_block self.encoder_stack[layer_id]['ffn'] = ffn_block