def init_models(self): emb_size = self.config['hp'].get('emb_size') hid_size = self.config['hp'].get('hid_size') weights = cudable(torch.ones(len(self.vocab))) weights[self.vocab.stoi['<pad>']] = 0 self.rec_criterion = nn.CrossEntropyLoss(weights, size_average=True) self.kl_criterion = KLLoss() encoder = RNNEncoder(emb_size, hid_size, len(self.vocab)) if self.config.get('decoder') == 'RNN': decoder = RNNDecoder(emb_size, hid_size, len(self.vocab)) else: dilations = self.config.get('dilations') decoder = CNNDecoder(emb_size, hid_size, len(self.vocab), hid_size, dilations=dilations) self.vae = cudable(VAE(encoder, decoder, hid_size)) self.optimizer = self.construct_optimizer() self.desired_kl_val = HPLinearScheme( *self.config.get('desired_kl_val', (0, 0, 1))) self.force_kl = HPLinearScheme(*self.config.get('force_kl', (1, 1, 1))) self.decoder_dropword_scheme = HPLinearScheme( *self.config.get('decoder_dropword_scheme', (0, 0, 1))) self.noiseness_scheme = HPLinearScheme( *self.config.get('noiseness_scheme', (1, 1, 1))) self.try_to_load_checkpoint()
def init_models(self): emb_size = self.config['hp'].get('emb_size') hid_size = self.config['hp'].get('hid_size') weights = cudable(torch.ones(len(self.vocab))) weights[self.vocab.stoi['<pad>']] = 0 self.rec_criterion = nn.CrossEntropyLoss(weights, size_average=True) self.encoder = cudable(RNNEncoder(emb_size, hid_size, len(self.vocab))) self.decoder = cudable(RNNDecoder(emb_size, hid_size, len(self.vocab))) parameters = list(self.encoder.parameters()) + list( self.decoder.parameters()) self.optimizer = Adam(parameters, lr=self.config.get('lr'))
def forward(self, x, p: float = None): assert x.dim() == 3 # (batch, len, emb_size) p = p or self.p mask = torch.bernoulli(torch.Tensor(x.size(0), x.size(1)).fill_(1 - p)) mask = cudable(mask).unsqueeze(-1).repeat(1, 1, x.size(2)) return x * mask if self.training else x
def loss_on_batch(self, batch): batch.text = cudable(batch.text) inputs, trg = batch.text[:, :-1], batch.text[:, 1:] encodings = self.encoder(batch.text) recs = self.decoder(encodings + self.compute_noise(encodings.size()), inputs) rec_loss = self.rec_criterion(recs.view(-1, len(self.vocab)), trg.contiguous().view(-1)) return rec_loss
def inference(self, dataloader): """ Produces predictions for a given dataloader """ seqs = [] originals = [] noiseness = compute_param_by_scheme(self.noiseness_scheme, self.num_iters_done) for batch in dataloader: inputs = cudable(batch.text) seqs.extend(self.vae.inference(inputs, self.vocab, noiseness)) originals.extend(inputs.detach().cpu().numpy().tolist()) return itos_many(seqs, self.vocab), itos_many(originals, self.vocab)
def loss_on_batch(self, batch): noiseness = compute_param_by_scheme(self.noiseness_scheme, self.num_iters_done) dropword_p = compute_param_by_scheme(self.decoder_dropword_scheme, self.num_iters_done) batch.text = cudable(batch.text) (means, log_stds), predictions = self.vae(batch.text, noiseness, dropword_p) rec_loss = self.rec_criterion(predictions.view(-1, len(self.vocab)), batch.text[:, 1:].contiguous().view(-1)) kl_loss = self.kl_criterion(means, log_stds.exp()) return rec_loss, kl_loss, (means, log_stds)
def inference(self, dataloader): """ Produces predictions for a given dataloader """ seqs = [] originals = [] for batch in dataloader: inputs = cudable(batch.text) encodings = self.encoder(inputs) sentences = inference(self.decoder, encodings, self.vocab) seqs.extend(sentences) originals.extend(inputs.detach().cpu().numpy().tolist()) return itos_many(seqs, self.vocab), itos_many(originals, self.vocab)
def inference(model, z, vocab, max_len=100): """ All decoder models have the same inference procedure Let's move it into the common function """ batch_size = z.size(0) BOS, EOS = vocab.stoi['<bos>'], vocab.stoi['<eos>'] active_seqs = cudable(torch.tensor([[BOS] for _ in range(batch_size)]).long()) active_seqs_idx = np.arange(batch_size) finished = [None for _ in range(batch_size)] n_finished = 0 for _ in range(max_len): next_tokens = model.forward(z, active_seqs).max(dim=-1)[1][:,-1] # TODO: use beam search active_seqs = torch.cat((active_seqs, next_tokens.unsqueeze(1)), dim=-1) finished_mask = (next_tokens == EOS).cpu().numpy().astype(bool) finished_seqs_idx = active_seqs_idx[finished_mask] active_seqs_idx = active_seqs_idx[finished_mask == 0] n_finished += finished_seqs_idx.size if finished_seqs_idx.size != 0: # TODO(universome) # finished[finished_seqs_idx] = active_seqs.masked_select(next_tokens == EOS).cpu().numpy() for i, seq in zip(finished_seqs_idx, active_seqs[next_tokens == EOS]): finished[i] = seq.cpu().numpy().tolist() active_seqs = active_seqs[next_tokens != EOS] z = z[next_tokens != EOS] if n_finished == batch_size: break # Well, some sentences were finished at the time # Let's just fill them in if n_finished != batch_size: # TODO(universome): finished[active_seqs_idx] = active_seqs for i, seq in zip(active_seqs_idx, active_seqs): finished[i] = seq.cpu().numpy().tolist() return finished
def shift_sequence(seq, n): """Prepends each sequence in a batch with n zero vectors""" batch_size, vec_size = seq.size(0), seq.size(-1) shifts = cudable(torch.zeros(batch_size, n, vec_size)) return torch.cat((shifts, seq), dim=1)
def compute_noise(self, size): noiseness = compute_param_by_scheme(self.noiseness_scheme, self.num_iters_done) stds = cudable(torch.from_numpy(np.random.normal(size=size)).float()) return stds * noiseness
def sample(means, stds): noise = cudable( torch.from_numpy(np.random.normal(size=stds.size())).float()) latents = means + stds * noise return latents