def test_monokuma_no_cond(self): mono = model.Monokuma(self.vocab_size, self.dim, self.num_layers, self.dropout, is_conditional=False, bidirectional_encoder=False, use_bridge=False, use_attention=False) avgCE = nn.CrossEntropyLoss(ignore_index=0) mono.train() subblock = self.tgt[0:2, :] golds = self.tgt[1:3, :].view(-1) mono.zero_grad() output, attns = mono(subblock, src=None, lengths=None, start=True) self.assertEqual(attns, None) loss = avgCE(output, golds) loss.backward() nn.utils.clip_grad_norm_(mono.parameters(), 0.25) for p in mono.parameters(): p.data.add_(-10, p.grad.data) subblock = self.tgt[2:3, :] golds = self.tgt[3:, :].view(-1) output, attns = mono(subblock, src=None, lengths=None, start=False) self.assertEqual(attns, None) loss = avgCE(output, golds) loss.backward() nn.utils.clip_grad_norm_(mono.parameters(), 0.25) for p in mono.parameters(): p.data.add_(-10, p.grad.data)
def test_epoch_continuous_data(self): mono = model.Monokuma(len(self.dat_cont.i2w), self.dim, self.num_layers, self.dropout, is_conditional=False, bidirectional_encoder=False, use_bridge=False, use_attention=False) junko = control.Junko(mono, self.lr, self.bptt, self.interval) val_loss, _, epoch_time = junko.epoch_continuous_data(self.dat_cont, 0)
def test_epoch_translation_data_simple(self): mono = model.Monokuma(len(self.dat_tran.i2w), self.dim, self.num_layers, self.dropout, is_conditional=True, bidirectional_encoder=False, use_bridge=False, use_attention=False) junko = control.Junko(mono, self.lr, self.bptt, self.interval) val_loss, _, epoch_time = junko.epoch_translation_data(self.dat_tran, 0, shuffle=False)
def test_monokuma_attn(self): mono = model.Monokuma(self.vocab_size, self.dim, self.num_layers, self.dropout, is_conditional=True, bidirectional_encoder=False, use_bridge=False, use_attention=True) avgCE = nn.CrossEntropyLoss(ignore_index=0) mono.train() # src # 1 6 9 # 2 7 10 # 3 8 0 # 4 0 0 # 5 0 0 # # # subblock # 9 3 1 golds # 8 2 100 8 2 100 # 7 100 0 subblock = self.tgt[0:2, :] golds = self.tgt[1:3, :].view(-1) mono.zero_grad() output, attns = mono(subblock, src=self.src, lengths=self.lengths, start=True) loss = avgCE(output, golds) loss.backward() nn.utils.clip_grad_norm_(mono.parameters(), 0.25) for p in mono.parameters(): p.data.add_(-10, p.grad.data) # src # 1 6 9 # 2 7 10 # 3 8 0 # 4 0 0 # 5 0 0 # # # subblock # 7 100 0 golds # # 100 0 0 subblock = self.tgt[2:3, :] golds = self.tgt[3:, :].view(-1) output, attns = mono(subblock, src=self.src, lengths=self.lengths, start=False) loss = avgCE(output, golds) loss.backward() nn.utils.clip_grad_norm_(mono.parameters(), 0.25) for p in mono.parameters(): p.data.add_(-10, p.grad.data)