def __init__(self, n_layers, n_head, d_k, d_v, d_model, d_inner, dropout=0.1): super().__init__() self.layer_stack = nn.ModuleList([ EncoderLayer(d_model, d_inner, n_head, d_k, d_v, dropout=dropout) for _ in range(n_layers) ])
def __init__(self, n_layers, n_head, d_k, d_v, d_model, d_inner, dropout=0.1): super().__init__() self.n_layer = int(n_layers) self.n_head = int(n_head) self.hidden_size = int(d_model) self.head_size = int(self.hidden_size / self.n_head) self.seq_len = 256 self.linear = nn.Linear(self.hidden_size, self.n_head * 4) self.softmax = nn.Softmax(dim=-1) self.layer_stack = nn.ModuleList([ EncoderLayer(d_model, d_inner, n_head, d_k, d_v, dropout=dropout) for _ in range(n_layers) ])