def __init__(self, num_layers, hidden_size, attn_type, copy_attn, cnn_kernel_width, dropout, embeddings, copy_attn_type): super(CNNDecoder, self).__init__() self.cnn_kernel_width = cnn_kernel_width self.embeddings = embeddings # Decoder State self.state = {} input_size = self.embeddings.embedding_size self.linear = nn.Linear(input_size, hidden_size) self.conv_layers = nn.ModuleList([ GatedConv(hidden_size, cnn_kernel_width, dropout, True) for i in range(num_layers) ]) self.attn_layers = nn.ModuleList( [ConvMultiStepAttention(hidden_size) for i in range(num_layers)]) # CNNDecoder has its own attention mechanism. # Set up a separate copy attention layer if needed. assert not copy_attn, "Copy mechanism not yet tested in conv2conv" if copy_attn: self.copy_attn = GlobalAttention(hidden_size, attn_type=copy_attn_type) else: self.copy_attn = None
def __init__(self, num_layers, hidden_size, attn_type, copy_attn, cnn_kernel_width, dropout, embeddings): super(CNNDecoder, self).__init__() # Basic attributes. self.decoder_type = 'cnn' self.num_layers = num_layers self.hidden_size = hidden_size self.cnn_kernel_width = cnn_kernel_width self.embeddings = embeddings self.dropout = dropout # Build the CNN. input_size = self.embeddings.embedding_size self.linear = nn.Linear(input_size, self.hidden_size) self.conv_layers = nn.ModuleList() for _ in range(self.num_layers): self.conv_layers.append( GatedConv(self.hidden_size, self.cnn_kernel_width, self.dropout, True)) self.attn_layers = nn.ModuleList() for _ in range(self.num_layers): self.attn_layers.append( onmt.modules.ConvMultiStepAttention(self.hidden_size)) # CNNDecoder has its own attention mechanism. # Set up a separated copy attention layer, if needed. self._copy = False if copy_attn: self.copy_attn = onmt.modules.GlobalAttention(hidden_size, attn_type=attn_type) self._copy = True