def __init__(self, size, self_attn, src_attn, feed_forward): super().__init__() self.size = size self.self_attn = self_attn self.src_attn = src_attn self.feed_forward = feed_forward self.sublayer = clones(SublayerConnection(size), 3)
def __init__(self, num_heads, dim_model): """ Take in model size and number of heads """ super().__init__() assert dim_model % num_heads == 0 # We assume d_v always equals d_k self.d_k = dim_model // num_heads self.num_heads = num_heads self.linears = clones(nn.Linear(dim_model, dim_model), 4)
def __init__(self, dim_model, self_attn, feed_forward): super().__init__() self.self_attn = self_attn self.feed_forward = feed_forward self.sublayer = clones(SublayerConnection(dim_model), 2) self.dim_model = dim_model
def __init__(self, layer, num_layers): super().__init__() self.layers = clones(layer, num_layers) self.norm = nn.LayerNorm(layer.dim_model)
def __init__(self, layer, num_layers): super(Decoder, self).__init__() self.layers = clones(layer, num_layers) self.norm = nn.LayerNorm(layer.size)