def encode(self, indices, lengths, noise): embeddings = self.embedding(indices) packed_embeddings = pack_padded_sequence(input=embeddings, lengths=lengths, batch_first=True) # Encode packed_output, state = self.encoder(packed_embeddings) hidden, cell = state # batch_size x nhidden hidden = hidden[-1] # get hidden state of last layer of encoder # normalize to unit ball (l2 norm of 1) - p=2, dim=1 norms = torch.norm(hidden, 2, 1) # For older versions of PyTorch use: hidden = torch.div(hidden, norms.expand_as(hidden)) # For newest version of PyTorch (as of 8/25) use this: # hidden = torch.div(hidden, norms.unsqueeze(1).expand_as(hidden)) if noise and self.noise_r > 0: # gauss_noise = torch.normal(means=torch.zeros(hidden.size()), # std=self.noise_r) gauss_noise = Normal(torch.zeros(hidden.size()), self.noise_r) hidden = hidden + to_gpu(self.gpu, Variable(gauss_noise.sample())) return hidden
def __init__(self, emsize, nhidden, ntokens, nlayers, noise_r=0.2, share_decoder_emb=False, hidden_init=False, dropout=0, gpu=False): super(Seq2Seq2Decoder, self).__init__() self.nhidden = nhidden self.emsize = emsize self.ntokens = ntokens self.nlayers = nlayers self.noise_r = noise_r self.hidden_init = hidden_init self.dropout = dropout self.gpu = gpu self.start_symbols = to_gpu(gpu, Variable(torch.ones(10, 1).long())) # Vocabulary embedding self.embedding = nn.Embedding(ntokens, emsize) self.embedding_decoder1 = nn.Embedding(ntokens, emsize) self.embedding_decoder2 = nn.Embedding(ntokens, emsize) # RNN Encoder and Decoder self.encoder = nn.LSTM(input_size=emsize, hidden_size=nhidden, num_layers=nlayers, dropout=dropout, batch_first=True) decoder_input_size = emsize + nhidden self.decoder1 = nn.LSTM(input_size=decoder_input_size, hidden_size=nhidden, num_layers=1, dropout=dropout, batch_first=True) self.decoder2 = nn.LSTM(input_size=decoder_input_size, hidden_size=nhidden, num_layers=1, dropout=dropout, batch_first=True) # Initialize Linear Transformation self.linear = nn.Linear(nhidden, ntokens) self.init_weights() if share_decoder_emb: self.embedding_decoder2.weight = self.embedding_decoder1.weight self.softmax = nn.Softmax(dim=1)
def generate(self, whichdecoder, hidden, maxlen, sample=False, temp=1.0): """Generate through decoder; no backprop""" batch_size = hidden.size(0) if self.hidden_init: # initialize decoder hidden state to encoder output state = (hidden.unsqueeze(0), self.init_state(batch_size)) else: state = self.init_hidden(batch_size) # <sos> self.start_symbols.data.resize_(batch_size, 1) self.start_symbols.data.fill_(1) self.start_symbols = to_gpu(self.gpu, self.start_symbols) if whichdecoder == 1: embedding = self.embedding_decoder1(self.start_symbols) else: embedding = self.embedding_decoder2(self.start_symbols) inputs = torch.cat([embedding, hidden.unsqueeze(1)], 2) # unroll all_indices = [] all_vals = [] for i in range(maxlen): if whichdecoder == 1: output, state = self.decoder1(inputs, state) else: output, state = self.decoder2(inputs, state) overvocab = self.linear(output.squeeze(1)) if not sample: vals, indices = torch.max(self.softmax(overvocab), 1) indices = indices.unsqueeze(1) else: assert 1 == 0 # sampling probs = F.softmax(overvocab / temp) indices = torch.multinomial(probs, 1) all_vals.append(vals.item()) all_indices.append(indices) if whichdecoder == 1: embedding = self.embedding_decoder1(indices) else: embedding = self.embedding_decoder2(indices) inputs = torch.cat([embedding, hidden.unsqueeze(1)], 2) max_indices = torch.cat(all_indices, 1) return max_indices, all_vals
def init_state(self, bsz): zeros = Variable(torch.zeros(self.nlayers, bsz, self.nhidden)) return to_gpu(self.gpu, zeros)
def init_hidden(self, bsz): zeros1 = Variable(torch.zeros(self.nlayers, bsz, self.nhidden)) zeros2 = Variable(torch.zeros(self.nlayers, bsz, self.nhidden)) return (to_gpu(self.gpu, zeros1), to_gpu(self.gpu, zeros2))