def _eval_autoencoder(self, batch, name='AE_eval'): self.net.set_modules_train_mode(True) # Build graph embed = self.net.embed(batch.enc_src.id) code = self.net.enc(embed, batch.enc_src.len) code_var = self.net.reg.with_var(code) cos_sim = F.cosine_similarity(code, code_var, dim=1).mean() decoded = self.net.dec(code_var) code_embed = ResultWriter.Embedding( embed=code.data, text=decoded.get_text_batch()) code_var_embed = ResultWriter.Embedding( embed=code_var.data, text=decoded.get_text_batch()) # Compute word prediction loss and accuracy masked_output, masked_target = \ mask_output_target(decoded.prob, batch.dec_tar.id, self.cfg.vocab_size_w) loss = self.net.dec.criterion_nll(masked_output, masked_target) _, max_ids = torch.max(masked_output, 1) acc = torch.mean(max_ids.eq(masked_target).float()) self.result.add(name, odict( code=code_embed, code_var=code_var_embed, loss=loss.data[0], acc=acc.data[0], cosim=cos_sim.data[0], var=self.net.reg.var, noise=self.net.enc.noise_radius, text=decoded.get_text_with_target(batch.enc_src.id), ))
def _generate_text(self, name="Generated"): self.net.set_modules_train_mode(True) with torch.no_grad(): # Build graph noise_size = (self.cfg.eval_size, self.cfg.hidden_size_w) noise = self.net.dec.make_noise_size_of(noise_size) code_fake = self.net.gen.for_eval() zs = self._get_interpolated_z(100) code_interpolated = self.net.gen(zs) #decoded0 = self.net.dec.tester(noise, max_len=self.cfg.max_len) decoded1 = self.net.dec.tester(code_fake, max_len=self.cfg.max_len) decoded2 = self.net.dec2.tester(code_fake, max_len=self.cfg.max_len) decoded3 = self.net.dec2.tester(code_interpolated, max_len=self.cfg.max_len) # code_embed_vae = ResultWriter.Embedding( # embed=noise.data, # text=decoded0.get_text_batch(), # tag='code_embed') code_embed = ResultWriter.Embedding( embed=code_fake.data, text=decoded1.get_text_batch(), tag='code_embed') code_embed_interpolated = ResultWriter.Embedding( embed=code_interpolated.data, text=decoded3.get_text_batch(), tag='code_embed') code_embed2 = ResultWriter.Embedding( embed=code_fake.data, text=decoded2.get_text_batch(), tag='code_embed') self.result.add(name, odict( #embed_fake_vae=code_embed_vae, embed_fake=code_embed, embed_interpolated=code_embed_interpolated, embed_fake2=code_embed2, #txt_word0=decoded0.get_text(), txt_word1=decoded1.get_text(), txt_word2=decoded2.get_text(), ))
def _generate_text(self, name="Generated"): self.net.set_modules_train_mode(True) # Build graph code_fake = self.net.gen.for_eval() decoded = self.net.dec(code_fake) code_fake_embed = ResultWriter.Embedding( embed=code_fake.data, text=decoded.get_text_batch()) self.result.add(name, odict( code=code_fake_embed, text=decoded.get_text(), )) # Evaluation scores = evaluate_sents(self.test_sents, decoded.get_text()) self.result.add("Evaluation", scores)
def _generate_text2(self, name="Generated"): self.net.set_modules_train_mode(True) # Build graph noise_size = (self.cfg.eval_size, self.cfg.hidden_size_w) noise = self.net.dec.make_noise_size_of(noise_size) decoded = self.net.dec.tester(noise, max_len=self.cfg.max_len) code_embed = ResultWriter.Embedding( embed=noise.data, text=decoded.get_text_batch(), tag='code_embed') self.result.add(name, odict( embed=code_embed, txt_word=decoded.get_text(), )) # Evaluation scores = evaluate_sents(self.test_sents, decoded.get_text()) self.result.add("Evaluation", scores)
def _eval_autoencoder(self, batch, name='AE_eval'): #name += ('/' + decode_mode) n_vars = 10 assert n_vars > 0 code_list = list() decoded_list = list() self.net.set_modules_train_mode(False) with torch.no_grad(): # Build graph embed = self.net.embed_w(batch.enc_src.id) #code = self.net.enc.with_noise(embed, batch.enc_src.len) enc_h = self.net.enc(embed, batch.enc_src.len) code = self.net.reg.without_var(enc_h) decoded = self.net.dec(code, max_len=self.cfg.max_len) #code = self.net.reg.without_var(enc_h) for _ in range(n_vars): #code_var = self.net.reg.with_var(code) # noise, _, _ = self.net.rev(code_) # code_r = self.net.gen(noise) #code_ = self._add_noise_to(code, 1.0) code_ = self.net.reg.with_var(enc_h) code_list.append(code_) decoded_ = self.net.dec(code_, max_len=max(batch.enc_src.len)) decoded_list.append(decoded_) # noise, _, _ = self.net.rev(code) # code_gen = self.net.gen(noise) #code_var = self.net.reg.with_var(code) #cos_sim = F.cosine_similarity(code, code_var, dim=1).mean() assert len(code_list) > 0 log.info(self.net.reg.sigma.mean(1)) log.info(self.net.reg.sigma[0]) # Compute word prediction loss and accuracy bsz = self.cfg.batch_size maxlen = max(batch.enc_src.len) #tar = batch.enc_src.id[:bsz].veiw(bsz, ) target = batch.dec_tar.id[:bsz*maxlen] # rnn #target = batch.enc_src.id[:bsz].view(-1) # cnn loss_recon, acc = self._recon_loss_and_acc_for_rnn( decoded.prob[:bsz], target, len(self.net.vocab_w)) #loss_var = 1 / torch.mean(self.net.reg.var) #loss_kl = self._compute_kl_div_loss(self.net.reg.mu, self.net.reg.sigma) embed = ResultWriter.Embedding( embed=code_.data, text=decoded.get_text_batch(), tag='code_embed') # embed_gen = ResultWriter.Embedding( # embed=code_gen.data, # text=decoded.get_text_batch(), # tag='code_embed') embeds_r = odict() for i in range(n_vars): embed_r = ResultWriter.Embedding( embed=code_list[i].data, text=decoded_list[i].get_text_batch(), tag='code_embed2') embeds_r.update({('noise_%d' % i): embed_r}) result_dict = odict( loss_recon=loss_recon.item(), #loss_var=loss_var.item(), #loss_kl=loss_kl.item(), acc=acc.item(), real=embed, #embed_gen=embed_gen, #embed_recon=embed_r, # cosim=cos_sim.item(), noise=self.net.enc.noise_radius, text_real=decoded.get_text_with_pair(batch.enc_src.id), text_noisy=decoded_.get_text_with_pair(batch.enc_src.id), ) result_dict.update(embeds_r) self.result.add(name, result_dict)