def __init__(self, size, self_attn, src_attn, feed_forward, dropout: float): super(DecoderLayer, self).__init__() self.size = size self.self_attn = self_attn self.src_attn = src_attn self.feed_forward = feed_forward self.sublayer: nn.ModuleList = clones( module=SublayerConnection(size, dropout), num_clones=SUBLAYERS_IN_DECODER_LAYER, )
def __init__(self, num_attn_heads: int, d_model: int, dropout=0.1): """Take in model size and number of heads.""" super(MultiHeadedAttention, self).__init__() assert d_model % num_attn_heads == 0 # We assume d_v always equals d_k self.d_k = d_model // num_attn_heads self.num_attn_heads = num_attn_heads self.linears = clones(nn.Linear(d_model, d_model), 4) # is applied to last dimension self.attn = None self.dropout = nn.Dropout(p=dropout)
def __init__(self, size: int, self_attn, feed_forward, dropout: float): """ Args: size: self_attn: feed_forward: dropout: dropout rate """ super(EncoderLayer, self).__init__() self.self_attn = self_attn self.feed_forward = feed_forward self.sublayer = clones( module=SublayerConnection(size, dropout), num_clones=SUBLAYERS_IN_ENCODER_LAYER, ) self.size = size
def __init__(self, layer: nn.Module, num_layers: int): super(Encoder, self).__init__() self.layers: nn.ModuleList = clones(layer, num_layers) self.norm: nn.Module = LayerNorm(layer.size)
def __init__(self, layer, num_layers: int): super(Decoder, self).__init__() self.layers = clones(layer, num_layers) self.norm = LayerNorm(layer.size)