def __init__(self, embedding, positional_embedding, layer, num_layers, dropout): super(TransformerEncoder, self).__init__() self.embedding = embedding self.positional_embedding = positional_embedding # embed_size = embedding.embedding_dim hidden_size = layer.hidden_size # self.input_projection = nn.Linear(embed_size, hidden_size) self.layers = clone(layer, num_layers) self.layer_norm = nn.LayerNorm(hidden_size) # self.output_projection = nn.Linear(hidden_size, embed_size) self.dropout = dropout
def __init__(self, embedding, positional_embedding, layer, num_layers, dropout, share_decoder_embedding=True): super(ConvDecoder, self).__init__() self.embedding = embedding self.positional_embedding = positional_embedding embed_size = embedding.embedding_dim vocab_size = embedding.num_embeddings hidden_size = layer.hidden_size self.input_projection = nn.Linear(embed_size, hidden_size) self.layers = clone(layer, num_layers) self.layer_norm = nn.LayerNorm(hidden_size) self.output_projection = nn.Linear(hidden_size, embed_size) self.dropout = dropout self.generator = nn.Linear(embed_size, vocab_size) if share_decoder_embedding: self.generator.weight = embedding.weight
def __init__(self, input_size, hidden_size, num_layers=1, bias=True, batch_first=False, dropout=0, bidirectional=True): super(ResidualGRU, self).__init__() self.lstm_layers = clone( nn.LSTM(input_size=hidden_size, hidden_size=hidden_size, num_layers=1, bias=bias, batch_first=batch_first, bidirectional=bidirectional), num_layers) self.input_projection = nn.Linear(input_size, hidden_size) self.input_size = input_size self.hidden_size = hidden_size self.num_layers = num_layers self.bias = bias self.batch_first = batch_first self.dropout = dropout self.bidirectional = bidirectional