def test_monokuma_no_cond(self):
        mono = model.Monokuma(self.vocab_size,
                              self.dim,
                              self.num_layers,
                              self.dropout,
                              is_conditional=False,
                              bidirectional_encoder=False,
                              use_bridge=False,
                              use_attention=False)
        avgCE = nn.CrossEntropyLoss(ignore_index=0)
        mono.train()

        subblock = self.tgt[0:2, :]
        golds = self.tgt[1:3, :].view(-1)
        mono.zero_grad()
        output, attns = mono(subblock, src=None, lengths=None, start=True)
        self.assertEqual(attns, None)
        loss = avgCE(output, golds)
        loss.backward()

        nn.utils.clip_grad_norm_(mono.parameters(), 0.25)
        for p in mono.parameters():
            p.data.add_(-10, p.grad.data)

        subblock = self.tgt[2:3, :]
        golds = self.tgt[3:, :].view(-1)
        output, attns = mono(subblock, src=None, lengths=None, start=False)
        self.assertEqual(attns, None)
        loss = avgCE(output, golds)
        loss.backward()

        nn.utils.clip_grad_norm_(mono.parameters(), 0.25)
        for p in mono.parameters():
            p.data.add_(-10, p.grad.data)
Esempio n. 2
0
 def test_epoch_continuous_data(self):
     mono = model.Monokuma(len(self.dat_cont.i2w),
                           self.dim,
                           self.num_layers,
                           self.dropout,
                           is_conditional=False,
                           bidirectional_encoder=False,
                           use_bridge=False,
                           use_attention=False)
     junko = control.Junko(mono, self.lr, self.bptt, self.interval)
     val_loss, _, epoch_time = junko.epoch_continuous_data(self.dat_cont, 0)
Esempio n. 3
0
 def test_epoch_translation_data_simple(self):
     mono = model.Monokuma(len(self.dat_tran.i2w),
                           self.dim,
                           self.num_layers,
                           self.dropout,
                           is_conditional=True,
                           bidirectional_encoder=False,
                           use_bridge=False,
                           use_attention=False)
     junko = control.Junko(mono, self.lr, self.bptt, self.interval)
     val_loss, _, epoch_time = junko.epoch_translation_data(self.dat_tran,
                                                            0,
                                                            shuffle=False)
    def test_monokuma_attn(self):
        mono = model.Monokuma(self.vocab_size,
                              self.dim,
                              self.num_layers,
                              self.dropout,
                              is_conditional=True,
                              bidirectional_encoder=False,
                              use_bridge=False,
                              use_attention=True)
        avgCE = nn.CrossEntropyLoss(ignore_index=0)
        mono.train()

        #  src
        # 1 6 9
        # 2 7 10
        # 3 8 0
        # 4 0 0
        # 5 0 0
        #
        #
        #  subblock
        # 9   3   1                golds
        # 8   2   100            8   2   100
        #                        7   100 0
        subblock = self.tgt[0:2, :]
        golds = self.tgt[1:3, :].view(-1)
        mono.zero_grad()
        output, attns = mono(subblock,
                             src=self.src,
                             lengths=self.lengths,
                             start=True)
        loss = avgCE(output, golds)
        loss.backward()

        nn.utils.clip_grad_norm_(mono.parameters(), 0.25)
        for p in mono.parameters():
            p.data.add_(-10, p.grad.data)

        #  src
        # 1 6 9
        # 2 7 10
        # 3 8 0
        # 4 0 0
        # 5 0 0
        #
        #
        #  subblock
        # 7   100 0                golds
        #                      # 100 0   0
        subblock = self.tgt[2:3, :]
        golds = self.tgt[3:, :].view(-1)
        output, attns = mono(subblock,
                             src=self.src,
                             lengths=self.lengths,
                             start=False)
        loss = avgCE(output, golds)
        loss.backward()

        nn.utils.clip_grad_norm_(mono.parameters(), 0.25)
        for p in mono.parameters():
            p.data.add_(-10, p.grad.data)