Example #1
0
 def _encoder_decoder_attention(self, q, k, v):
     with tf.variable_scope("encoder-decoder-attention"):
         attention = Attention(num_heads=self.num_heads,
                               masked=False,
                               linear_key_dim=self.linear_key_dim,
                               linear_value_dim=self.linear_value_dim,
                               model_dim=self.model_dim,
                               dropout=self.dropout)
         return attention.multi_head(q, k, v)
Example #2
0
 def _masked_self_attention(self, q, k, v):
     with tf.variable_scope("masked-self-attention"):
         attention = Attention(
             num_heads=self.num_heads,
             masked=True,  # Not implemented yet
             linear_key_dim=self.linear_key_dim,
             linear_value_dim=self.linear_value_dim,
             model_dim=self.model_dim,
             dropout=self.dropout)
         return attention.multi_head(q, k, v)
Example #3
0
 def _self_attention(self, q, k, v, key_masks):
     with tf.variable_scope("self-attention"):
         attention = Attention(
             num_heads=self.num_heads,
             masked=False,
             linear_key_dim=self.linear_key_dim,
             linear_value_dim=self.linear_value_dim,
             model_dim=self.model_dim,
             max_seq_len=self.max_seq_len,
             dropout=self.dropout,
         )
         #self.att = attention
         return attention.multi_head(q, k, v, key_masks)