class Seq2Seq(BaseModel): def __init__(self, vocab_size, wordvec_size, hidden_size): V = vocab_size D = wordvec_size H = hidden_size self.encoder = Encoder(V, D, H) self.decoder = Decoder(V, D, H) self.softmax = TimeSoftmaxWithLoss() self.params = self.encoder.params + self.decoder.params self.grads = self.encoder.grads + self.decoder.grads def forward(self, xs, ts): decoder_xs = ts[:, :-1] decoder_ts = ts[:, 1:] h = self.encoder.forward(xs) score = self.decoder.forward(decoder_xs, h) loss = self.softmax.forward(score, decoder_ts) return loss def backward(self, dout=1): dout = self.softmax.backward(dout) dh = self.decoder.backward(dout) dout = self.encoder.backward(dh) return dout def generate(self, xs, start_id, sample_size): h = self.encoder.forward(xs) sampled = self.decoder.generate(h, start_id, sample_size) return sampled
class TestDecoder(unittest.TestCase): def setUp(self): vocab_size = 13 wordvec_size = 100 hidden_size = 100 self.decoder = Decoder(vocab_size, wordvec_size, hidden_size) self.xs = np.random.randint(0, 13, (13, 100)) self.h = np.random.randn(13, 100) def test_forward(self): score = self.decoder.forward(self.xs, self.h) self.assertEqual((13, 100, 13), score.shape) def test_backward(self): dscore = self.decoder.forward(self.xs, self.h) dh = self.decoder.backward(dscore) self.assertEqual((13, 100), dh.shape) def test_generate(self): h = np.random.randn(1, 100) start_id = 0 sample_size = 10 sampled = self.decoder.generate(h, start_id, sample_size) self.assertEqual(10, len(sampled))
# Set content target criterion.content_layers[1]:setTarget(outputLatent) # Compute loss output = dec:forward(outputLatent):clone() # forward through decoder, generate transformed images loss = criterion:forward(output) # forward through loss network, compute loss functions contentLoss = criterion.contentLoss styleLoss = criterion.styleLoss tvLoss = 0 if opt.tvWeight > 0 . tvLoss = criterion.net.get(2).loss # Backpropagate gradients decGrad = criterion.backward(output) # backprop through loss network, compute gradients w.r.t. the transformed images dec.backward(outputLatent, decGrad) # backprop gradients through decoder # Optionally train the decoder to reconstruct style images styleReconLoss = 0 if opt.reconStyle : criterion.setContentTarget(styleInput) styleRecon = dec.forward(styleLatent).clone() styleReconLoss = criterion.forward(styleRecon) decGrad = criterion.backward(styleRecon) dec.backward(styleLatent, decGrad) loss = loss + styleReconLoss table.insert(history, {optimState.iterCounter, loss, contentLoss, styleLoss, styleReconLoss}) maybe_print(loss, contentLoss, styleLoss, tvLoss, timer) if opt.reconStyle :