def test_dropout_WITH_PROB_ZERO(self): rnn = DecoderRNN(self.vocab_size, 50, 16, 0, 1, dropout_p=0) for param in rnn.parameters(): param.data.uniform_(-1, 1) output1, _, _ = rnn() output2, _, _ = rnn() for prob1, prob2 in zip(output1, output2): self.assertTrue(torch.equal(prob1.data, prob2.data))
def test_dropout_WITH_NON_ZERO_PROB(self): rnn = DecoderRNN(self.vocab_size, 50, 16, 0, 1, n_layers=2, dropout_p=0.5) for param in rnn.parameters(): param.data.uniform_(-1, 1) equal = True for _ in range(50): output1, _, _ = rnn() output2, _, _ = rnn() if not torch.equal(output1[0].data, output2[0].data): equal = False break self.assertFalse(equal)
def test_k_1(self): """ When k=1, the output of topk decoder should be the same as a normal decoder. """ batch_size = 1 eos = 1 for _ in range(10): # Repeat the randomized test multiple times decoder = DecoderRNN(self.vocab_size, 50, 16, 0, eos) for param in decoder.parameters(): param.data.uniform_(-1, 1) topk_decoder = TopKDecoder(decoder, 1) output, _, other = decoder() output_topk, _, other_topk = topk_decoder() self.assertEqual(len(output), len(output_topk)) finished = [False] * batch_size seq_scores = [0] * batch_size for t_step, t_output in enumerate(output): score, _ = t_output.topk(1) symbols = other['sequence'][t_step] for b in range(batch_size): seq_scores[b] += score[b].data[0] symbol = symbols[b].data[0] if not finished[b] and symbol == eos: finished[b] = True self.assertEqual(other_topk['length'][b], t_step + 1) self.assertTrue( np.isclose(seq_scores[b], other_topk['score'][b][0])) if not finished[b]: symbol_topk = other_topk['topk_sequence'][t_step][ b].data[0][0] self.assertEqual(symbol, symbol_topk) self.assertTrue( torch.equal(t_output.data, output_topk[t_step].data)) if sum(finished) == batch_size: break
def prepare_model(opt, vocab_size, tgt): dis_hidden_size = opt.hidden_size * opt.n_layers * (2 if opt.bidirectional else 1) # Prepare loss encoder = EncoderRNN(vocab_size, opt.max_len, opt.hidden_size, bidirectional=opt.bidirectional, n_layers=opt.n_layers, variable_lengths=True) decoder = DecoderRNN(vocab_size, opt.max_len, opt.hidden_size * 2 if opt.bidirectional else opt.hidden_size, dropout_p=opt.dropout, n_layers=opt.n_layers, use_attention=False, bidirectional=opt.bidirectional, eos_id=tgt.eos_id, sos_id=tgt.sos_id) seq2seq = Seq2seq(encoder, decoder).to(opt.device) # gen = Generator(dis_hidden_size, opt.z_size).to(opt.device) # encoder_new = EncoderRNN(vocab_size, opt.max_len, opt.hidden_size, # bidirectional=opt.bidirectional, n_layers=opt.n_layers,variable_lengths=True).to(opt.device) # # # dis_clf = Discriminator(dis_hidden_size, opt.clf_layers).to(opt.device) # rnn_clf = RNNclaissfier(encoder_new, dis_clf).to(opt.device) # # dis_gen = Discriminator(dis_hidden_size, opt.clf_layers).to(opt.device) # opt_gen = optim.Adam(gen.parameters(), lr=opt.gen_lr) # opt_dis_clf = optim.Adam(rnn_clf.parameters(), lr=opt.dis_dec_lr) # opt_dis_gen = optim.Adam(dis_gen.parameters(), lr=opt.dis_gen_lr) gen = 1 opt_gen = 1 rnn_clf = 1 opt_dis_clf = 1 dis_gen = 1 opt_dis_gen = 1 return seq2seq, gen, opt_gen, rnn_clf, opt_dis_clf, dis_gen, opt_dis_gen
def setUpClass(self): test_path = os.path.dirname(os.path.realpath(__file__)) src = SourceField() trg = TargetField() dataset = torchtext.data.TabularDataset( path=os.path.join(test_path, 'data/eng-fra.txt'), format='tsv', fields=[('src', src), ('trg', trg)], ) src.build_vocab(dataset) trg.build_vocab(dataset) encoder = EncoderRNN(len(src.vocab), 10, 10, rnn_cell='lstm') decoder = DecoderRNN(len(trg.vocab), 10, 10, trg.sos_id, trg.eos_id, rnn_cell='lstm') seq2seq = Seq2seq(encoder, decoder) self.predictor = Predictor(seq2seq, src.vocab, trg.vocab)
def setUp(self): test_path = os.path.dirname(os.path.realpath(__file__)) src = SourceField() tgt = TargetField() self.dataset = torchtext.data.TabularDataset( path=os.path.join(test_path, 'data/eng-fra.txt'), format='tsv', fields=[('src', src), ('tgt', tgt)], ) src.build_vocab(self.dataset) tgt.build_vocab(self.dataset) encoder = EncoderRNN(len(src.vocab), 10, 10, rnn_cell='lstm') decoder = DecoderRNN(len(tgt.vocab), 10, 10, tgt.sos_id, tgt.eos_id, rnn_cell='lstm') self.seq2seq = Seq2seq(encoder, decoder) for param in self.seq2seq.parameters(): param.data.uniform_(-0.08, 0.08)
seq2seq = None optimizer = None if not opt.resume: # Initialize model hidden_size = 128 bidirectional = False latent_size = 128 item_encoder = ItemEncoder(len(input_vocab), hidden_size=hidden_size, predic_rate=True) decoder = DecoderRNN(len(tgt.vocab), max_len, hidden_size * 2 if bidirectional else hidden_size, dropout_p=0.2, use_attention=False, bidirectional=bidirectional, eos_id=tgt.eos_id, sos_id=tgt.sos_id) # decoder = ContextDecoderRNN( # len(tgt.vocab), max_len, hidden_size * 2 if bidirectional else hidden_size, # dropout_p=0.2, use_attention=False, bidirectional=bidirectional, # eos_id=tgt.eos_id, sos_id=tgt.sos_id, use_gC2S=True) seq2seq = Seq2seq(item_encoder, decoder) if torch.cuda.is_available(): seq2seq.cuda() for param in seq2seq.parameters(): if param.requires_grad: param.data.uniform_(-0.08, 0.08)
hidden_size = 128 bidirectional = False item_encoder = ItemEncoder(rating_count=10, item_count=10, hidden_size=hidden_size) encoder = EncoderRNN(len(src.vocab), max_len, hidden_size, bidirectional=bidirectional, rnn_cell='lstm', variable_lengths=True) decoder = DecoderRNN(len(tgt.vocab), max_len, hidden_size * 2, dropout_p=0.2, use_attention=False, bidirectional=bidirectional, rnn_cell='lstm', eos_id=tgt.eos_id, sos_id=tgt.sos_id) seq2seq = Seq2seq(item_encoder, decoder) if torch.cuda.is_available(): seq2seq.cuda() for param in seq2seq.parameters(): param.data.uniform_(-0.08, 0.08) # train t = VAESupervisedTrainer(loss=loss, batch_size=32, checkpoint_every=50,
def test_k_greater_then_1(self): """ Implement beam search manually and compare results from topk decoder. """ max_len = 50 beam_size = 3 batch_size = 1 hidden_size = 8 sos = 0 eos = 1 for _ in range(10): decoder = DecoderRNN(self.vocab_size, max_len, hidden_size, sos, eos) for param in decoder.parameters(): param.data.uniform_(-1, 1) topk_decoder = TopKDecoder(decoder, beam_size) encoder_hidden = torch.autograd.Variable( torch.randn(1, batch_size, hidden_size)) _, _, other_topk = topk_decoder(encoder_hidden=encoder_hidden) # Queue state: # 1. time step # 2. symbol # 3. hidden state # 4. accumulated log likelihood # 5. beam number batch_queue = [[(-1, sos, encoder_hidden[:, b, :].unsqueeze(1), 0, None)] for b in range(batch_size)] time_batch_queue = [batch_queue] batch_finished_seqs = [list() for _ in range(batch_size)] for t in range(max_len): new_batch_queue = [] for b in range(batch_size): new_queue = [] for k in range(min(len(time_batch_queue[t][b]), beam_size)): _, inputs, hidden, seq_score, _ = time_batch_queue[t][ b][k] if inputs == eos: batch_finished_seqs[b].append( time_batch_queue[t][b][k]) continue inputs = torch.autograd.Variable( torch.LongTensor([[inputs]])) decoder_outputs, hidden, _ = decoder.forward_step( inputs, hidden, None, F.log_softmax) topk_score, topk = decoder_outputs[0].data.topk( beam_size) for score, sym in zip(topk_score.tolist()[0], topk.tolist()[0]): new_queue.append( (t, sym, hidden, score + seq_score, k)) new_queue = sorted(new_queue, key=lambda x: x[3], reverse=True)[:beam_size] new_batch_queue.append(new_queue) time_batch_queue.append(new_batch_queue) # finished beams finalist = [l[:beam_size] for l in batch_finished_seqs] # unfinished beams for b in range(batch_size): if len(finalist[b]) < beam_size: last_step = sorted(time_batch_queue[-1][b], key=lambda x: x[3], reverse=True) finalist[b] += last_step[:beam_size - len(finalist[b])] # back track topk = [] for b in range(batch_size): batch_topk = [] for k in range(beam_size): seq = [finalist[b][k]] prev_k = seq[-1][4] prev_t = seq[-1][0] while prev_k is not None: seq.append(time_batch_queue[prev_t][b][prev_k]) prev_k = seq[-1][4] prev_t = seq[-1][0] batch_topk.append([s for s in reversed(seq)]) topk.append(batch_topk) for b in range(batch_size): topk[b] = sorted(topk[b], key=lambda s: s[-1][3], reverse=True) topk_scores = other_topk['score'] topk_lengths = other_topk['topk_length'] topk_pred_symbols = other_topk['topk_sequence'] for b in range(batch_size): precision_error = False for k in range(beam_size - 1): if np.isclose(topk_scores[b][k], topk_scores[b][k + 1]): precision_error = True break if precision_error: break for k in range(beam_size): self.assertEqual(topk_lengths[b][k], len(topk[b][k]) - 1) self.assertTrue( np.isclose(topk_scores[b][k], topk[b][k][-1][3])) total_steps = topk_lengths[b][k] for t in range(total_steps): self.assertEqual(topk_pred_symbols[t][b, k].data[0], topk[b][k][t + 1][1]) # topk includes SOS
def test_init(self): decoder = DecoderRNN(self.vocab_size, 50, 16, 0, 1, input_dropout_p=0) TopKDecoder(decoder, 3)