Beispiel #1
0
    def _eval_autoencoder(self, batch, name='AE_eval'):
        self.net.set_modules_train_mode(True)

        # Build graph
        embed = self.net.embed(batch.enc_src.id)
        code = self.net.enc(embed, batch.enc_src.len)
        code_var = self.net.reg.with_var(code)
        cos_sim = F.cosine_similarity(code, code_var, dim=1).mean()
        decoded = self.net.dec(code_var)

        code_embed = ResultWriter.Embedding(
            embed=code.data, text=decoded.get_text_batch())
        code_var_embed = ResultWriter.Embedding(
            embed=code_var.data, text=decoded.get_text_batch())

        # Compute word prediction loss and accuracy
        masked_output, masked_target = \
            mask_output_target(decoded.prob, batch.dec_tar.id, self.cfg.vocab_size_w)
        loss = self.net.dec.criterion_nll(masked_output, masked_target)
        _, max_ids = torch.max(masked_output, 1)
        acc = torch.mean(max_ids.eq(masked_target).float())

        self.result.add(name, odict(
            code=code_embed,
            code_var=code_var_embed,
            loss=loss.data[0],
            acc=acc.data[0],
            cosim=cos_sim.data[0],
            var=self.net.reg.var,
            noise=self.net.enc.noise_radius,
            text=decoded.get_text_with_target(batch.enc_src.id),
            ))
Beispiel #2
0
    def _generate_text(self, name="Generated"):
        self.net.set_modules_train_mode(True)

        with torch.no_grad():
            # Build graph
            noise_size = (self.cfg.eval_size, self.cfg.hidden_size_w)
            noise = self.net.dec.make_noise_size_of(noise_size)
            code_fake = self.net.gen.for_eval()
            zs = self._get_interpolated_z(100)
            code_interpolated = self.net.gen(zs)

            #decoded0 = self.net.dec.tester(noise, max_len=self.cfg.max_len)
            decoded1 = self.net.dec.tester(code_fake, max_len=self.cfg.max_len)
            decoded2 = self.net.dec2.tester(code_fake, max_len=self.cfg.max_len)
            decoded3 = self.net.dec2.tester(code_interpolated, max_len=self.cfg.max_len)

        # code_embed_vae = ResultWriter.Embedding(
        #     embed=noise.data,
        #     text=decoded0.get_text_batch(),
        #     tag='code_embed')
        code_embed = ResultWriter.Embedding(
            embed=code_fake.data,
            text=decoded1.get_text_batch(),
            tag='code_embed')

        code_embed_interpolated = ResultWriter.Embedding(
            embed=code_interpolated.data,
            text=decoded3.get_text_batch(),
            tag='code_embed')

        code_embed2 = ResultWriter.Embedding(
            embed=code_fake.data,
            text=decoded2.get_text_batch(),
            tag='code_embed')

        self.result.add(name, odict(
            #embed_fake_vae=code_embed_vae,
            embed_fake=code_embed,
            embed_interpolated=code_embed_interpolated,
             embed_fake2=code_embed2,
            #txt_word0=decoded0.get_text(),
            txt_word1=decoded1.get_text(),
            txt_word2=decoded2.get_text(),
        ))
Beispiel #3
0
    def _generate_text(self, name="Generated"):
        self.net.set_modules_train_mode(True)

        # Build graph
        code_fake = self.net.gen.for_eval()
        decoded = self.net.dec(code_fake)

        code_fake_embed = ResultWriter.Embedding(
            embed=code_fake.data, text=decoded.get_text_batch())

        self.result.add(name, odict(
            code=code_fake_embed,
            text=decoded.get_text(),
            ))

        # Evaluation
        scores = evaluate_sents(self.test_sents, decoded.get_text())
        self.result.add("Evaluation", scores)
Beispiel #4
0
    def _generate_text2(self, name="Generated"):
        self.net.set_modules_train_mode(True)

        # Build graph
        noise_size = (self.cfg.eval_size, self.cfg.hidden_size_w)
        noise = self.net.dec.make_noise_size_of(noise_size)
        decoded = self.net.dec.tester(noise, max_len=self.cfg.max_len)

        code_embed = ResultWriter.Embedding(
            embed=noise.data,
            text=decoded.get_text_batch(),
            tag='code_embed')

        self.result.add(name, odict(
            embed=code_embed,
            txt_word=decoded.get_text(),
        ))

        # Evaluation
        scores = evaluate_sents(self.test_sents, decoded.get_text())
        self.result.add("Evaluation", scores)
Beispiel #5
0
    def _eval_autoencoder(self, batch, name='AE_eval'):
        #name += ('/' + decode_mode)
        n_vars = 10
        assert n_vars > 0
        code_list = list()
        decoded_list = list()

        self.net.set_modules_train_mode(False)

        with torch.no_grad():
            # Build graph
            embed = self.net.embed_w(batch.enc_src.id)
            #code = self.net.enc.with_noise(embed, batch.enc_src.len)
            enc_h = self.net.enc(embed, batch.enc_src.len)
            code = self.net.reg.without_var(enc_h)
            decoded = self.net.dec(code, max_len=self.cfg.max_len)
            #code = self.net.reg.without_var(enc_h)
            for _ in range(n_vars):
                #code_var = self.net.reg.with_var(code)
                # noise, _, _ = self.net.rev(code_)
                # code_r = self.net.gen(noise)
                #code_ = self._add_noise_to(code, 1.0)
                code_ = self.net.reg.with_var(enc_h)
                code_list.append(code_)
                decoded_ = self.net.dec(code_, max_len=max(batch.enc_src.len))
                decoded_list.append(decoded_)

            # noise, _, _ = self.net.rev(code)
            # code_gen = self.net.gen(noise)

            #code_var = self.net.reg.with_var(code)
            #cos_sim = F.cosine_similarity(code, code_var, dim=1).mean()
            assert len(code_list) > 0
        log.info(self.net.reg.sigma.mean(1))
        log.info(self.net.reg.sigma[0])
        # Compute word prediction loss and accuracy
        bsz = self.cfg.batch_size
        maxlen = max(batch.enc_src.len)
        #tar = batch.enc_src.id[:bsz].veiw(bsz, )
        target = batch.dec_tar.id[:bsz*maxlen] # rnn
        #target = batch.enc_src.id[:bsz].view(-1) # cnn
        loss_recon, acc = self._recon_loss_and_acc_for_rnn(
            decoded.prob[:bsz], target, len(self.net.vocab_w))
        #loss_var = 1 / torch.mean(self.net.reg.var)
        #loss_kl = self._compute_kl_div_loss(self.net.reg.mu, self.net.reg.sigma)

        embed = ResultWriter.Embedding(
            embed=code_.data,
            text=decoded.get_text_batch(),
            tag='code_embed')

        # embed_gen = ResultWriter.Embedding(
        #     embed=code_gen.data,
        #     text=decoded.get_text_batch(),
        #     tag='code_embed')

        embeds_r = odict()
        for i in range(n_vars):
            embed_r = ResultWriter.Embedding(
                embed=code_list[i].data,
                text=decoded_list[i].get_text_batch(),
                tag='code_embed2')
            embeds_r.update({('noise_%d' % i): embed_r})

        result_dict = odict(
            loss_recon=loss_recon.item(),
            #loss_var=loss_var.item(),
            #loss_kl=loss_kl.item(),
            acc=acc.item(),
            real=embed,
            #embed_gen=embed_gen,
            #embed_recon=embed_r,
            # cosim=cos_sim.item(),
            noise=self.net.enc.noise_radius,
            text_real=decoded.get_text_with_pair(batch.enc_src.id),
            text_noisy=decoded_.get_text_with_pair(batch.enc_src.id),
        )
        result_dict.update(embeds_r)
        self.result.add(name, result_dict)