コード例 #1
0
 def __init__(self, model_dimension, dropout_percentage, number_of_heads, feedforward_dimension, number_of_layers):
     """
         Create DecoderLayer copy number_of_layers times.
         Args:
             model_dimension: model dimension generally same as embedding_dimension of VocabEmbedding or FeatureEmbedding
             dropout_percentage: droupout percentage for residual connection
             number_of_heads: number of heads for multiheaded attention
             feedforward_dimension: units of feedforward layer. Generally 2048 units
             number_of_layers: Number of encoder layers
     """
     super(Decoder, self).__init__()
     self.dec_layers = clone(DecoderLayer(model_dimension, dropout_percentage, number_of_heads, feedforward_dimension), number_of_layers)
コード例 #2
0
 def __init__(self, model_dimension, dropout_percentage, number_of_heads,
              feedforward_dimension):
     """
         Creates 2 copies of ResidualConnection, multiheaded attention with number_of_heads heads and fully-connected layer of shape (model_dimension, feedforward_dimension)
         Args:
             model_dimension: model dimension generally same as embedding_dimension of VocabEmbedding or FeatureEmbedding
             dropout_percentage: droupout percentage for residual connection
             number_of_heads: number of heads for multiheaded attention
             feedforward_dimension: units of feedforward layer. Generally 2048 units
     """
     super(EncoderLayer, self).__init__()
     self.res_layers = clone(
         ResidualConnection(model_dimension, dropout_percentage), 2)
     self.self_att = MultiheadedAttention(model_dimension, number_of_heads)
     self.feed_forward = PositionwiseFeedForward(model_dimension,
                                                 feedforward_dimension)
コード例 #3
0
 def __init__(self, model_dimension, number_of_heads):
     """
         Creates 4 copies of linear layer for Query, Key, Value and attention connections.
         It will help attention to have multiple “representation subspaces” to focus on. This number is equivalent to number_of_heads.
         4 Linear layers are used as weights for Query, Key, Value and multihead concatinated attention.
         For reducing computational cost, weights (from linear layers) are shared between all the heads. 
         Args:
             model_dimension: model dimension
             number_of_heads: number of heads in mutliheaded attention
     """
     super(MultiheadedAttention, self).__init__()
     assert model_dimension % number_of_heads == 0
     self.model_dimension = model_dimension
     self.number_of_heads = number_of_heads
     self.d_k = model_dimension // number_of_heads
     self.linears = clone(nn.Linear(model_dimension, model_dimension),
                          4)  # bias True??