Ejemplo n.º 1
0
    def init_models(self):
        emb_size = self.config['hp'].get('emb_size')
        hid_size = self.config['hp'].get('hid_size')

        weights = cudable(torch.ones(len(self.vocab)))
        weights[self.vocab.stoi['<pad>']] = 0
        self.rec_criterion = nn.CrossEntropyLoss(weights, size_average=True)
        self.kl_criterion = KLLoss()

        encoder = RNNEncoder(emb_size, hid_size, len(self.vocab))

        if self.config.get('decoder') == 'RNN':
            decoder = RNNDecoder(emb_size, hid_size, len(self.vocab))
        else:
            dilations = self.config.get('dilations')
            decoder = CNNDecoder(emb_size,
                                 hid_size,
                                 len(self.vocab),
                                 hid_size,
                                 dilations=dilations)

        self.vae = cudable(VAE(encoder, decoder, hid_size))
        self.optimizer = self.construct_optimizer()
        self.desired_kl_val = HPLinearScheme(
            *self.config.get('desired_kl_val', (0, 0, 1)))
        self.force_kl = HPLinearScheme(*self.config.get('force_kl', (1, 1, 1)))
        self.decoder_dropword_scheme = HPLinearScheme(
            *self.config.get('decoder_dropword_scheme', (0, 0, 1)))
        self.noiseness_scheme = HPLinearScheme(
            *self.config.get('noiseness_scheme', (1, 1, 1)))

        self.try_to_load_checkpoint()
Ejemplo n.º 2
0
    def init_models(self):
        emb_size = self.config['hp'].get('emb_size')
        hid_size = self.config['hp'].get('hid_size')

        weights = cudable(torch.ones(len(self.vocab)))
        weights[self.vocab.stoi['<pad>']] = 0

        self.rec_criterion = nn.CrossEntropyLoss(weights, size_average=True)
        self.encoder = cudable(RNNEncoder(emb_size, hid_size, len(self.vocab)))
        self.decoder = cudable(RNNDecoder(emb_size, hid_size, len(self.vocab)))

        parameters = list(self.encoder.parameters()) + list(
            self.decoder.parameters())
        self.optimizer = Adam(parameters, lr=self.config.get('lr'))
Ejemplo n.º 3
0
    def forward(self, x, p: float = None):
        assert x.dim() == 3  # (batch, len, emb_size)

        p = p or self.p
        mask = torch.bernoulli(torch.Tensor(x.size(0), x.size(1)).fill_(1 - p))
        mask = cudable(mask).unsqueeze(-1).repeat(1, 1, x.size(2))

        return x * mask if self.training else x
Ejemplo n.º 4
0
    def loss_on_batch(self, batch):
        batch.text = cudable(batch.text)
        inputs, trg = batch.text[:, :-1], batch.text[:, 1:]
        encodings = self.encoder(batch.text)

        recs = self.decoder(encodings + self.compute_noise(encodings.size()), inputs)

        rec_loss = self.rec_criterion(recs.view(-1, len(self.vocab)), trg.contiguous().view(-1))

        return rec_loss
Ejemplo n.º 5
0
    def inference(self, dataloader):
        """
        Produces predictions for a given dataloader
        """
        seqs = []
        originals = []
        noiseness = compute_param_by_scheme(self.noiseness_scheme,
                                            self.num_iters_done)

        for batch in dataloader:
            inputs = cudable(batch.text)
            seqs.extend(self.vae.inference(inputs, self.vocab, noiseness))
            originals.extend(inputs.detach().cpu().numpy().tolist())

        return itos_many(seqs, self.vocab), itos_many(originals, self.vocab)
Ejemplo n.º 6
0
    def loss_on_batch(self, batch):
        noiseness = compute_param_by_scheme(self.noiseness_scheme,
                                            self.num_iters_done)
        dropword_p = compute_param_by_scheme(self.decoder_dropword_scheme,
                                             self.num_iters_done)

        batch.text = cudable(batch.text)
        (means, log_stds), predictions = self.vae(batch.text, noiseness,
                                                  dropword_p)

        rec_loss = self.rec_criterion(predictions.view(-1, len(self.vocab)),
                                      batch.text[:, 1:].contiguous().view(-1))
        kl_loss = self.kl_criterion(means, log_stds.exp())

        return rec_loss, kl_loss, (means, log_stds)
Ejemplo n.º 7
0
    def inference(self, dataloader):
        """
        Produces predictions for a given dataloader
        """
        seqs = []
        originals = []

        for batch in dataloader:
            inputs = cudable(batch.text)
            encodings = self.encoder(inputs)
            sentences = inference(self.decoder, encodings, self.vocab)

            seqs.extend(sentences)
            originals.extend(inputs.detach().cpu().numpy().tolist())

        return itos_many(seqs, self.vocab), itos_many(originals, self.vocab)
Ejemplo n.º 8
0
def inference(model, z, vocab, max_len=100):
    """
    All decoder models have the same inference procedure
    Let's move it into the common function
    """
    batch_size = z.size(0)
    BOS, EOS = vocab.stoi['<bos>'], vocab.stoi['<eos>']
    active_seqs = cudable(torch.tensor([[BOS] for _ in range(batch_size)]).long())
    active_seqs_idx = np.arange(batch_size)
    finished = [None for _ in range(batch_size)]
    n_finished = 0

    for _ in range(max_len):
        next_tokens = model.forward(z, active_seqs).max(dim=-1)[1][:,-1] # TODO: use beam search
        active_seqs = torch.cat((active_seqs, next_tokens.unsqueeze(1)), dim=-1)
        finished_mask = (next_tokens == EOS).cpu().numpy().astype(bool)
        finished_seqs_idx = active_seqs_idx[finished_mask]
        active_seqs_idx = active_seqs_idx[finished_mask == 0]
        n_finished += finished_seqs_idx.size

        if finished_seqs_idx.size != 0:
            # TODO(universome)
            # finished[finished_seqs_idx] = active_seqs.masked_select(next_tokens == EOS).cpu().numpy()
            for i, seq in zip(finished_seqs_idx, active_seqs[next_tokens == EOS]):
                finished[i] = seq.cpu().numpy().tolist()

            active_seqs = active_seqs[next_tokens != EOS]
            z = z[next_tokens != EOS]

        if n_finished == batch_size: break

    # Well, some sentences were finished at the time
    # Let's just fill them in
    if n_finished != batch_size:
        # TODO(universome): finished[active_seqs_idx] = active_seqs
        for i, seq in zip(active_seqs_idx, active_seqs):
            finished[i] = seq.cpu().numpy().tolist()

    return finished
Ejemplo n.º 9
0
def shift_sequence(seq, n):
    """Prepends each sequence in a batch with n zero vectors"""
    batch_size, vec_size = seq.size(0), seq.size(-1)
    shifts = cudable(torch.zeros(batch_size, n, vec_size))

    return torch.cat((shifts, seq), dim=1)
Ejemplo n.º 10
0
    def compute_noise(self, size):
        noiseness = compute_param_by_scheme(self.noiseness_scheme, self.num_iters_done)
        stds = cudable(torch.from_numpy(np.random.normal(size=size)).float())

        return stds * noiseness
Ejemplo n.º 11
0
def sample(means, stds):
    noise = cudable(
        torch.from_numpy(np.random.normal(size=stds.size())).float())
    latents = means + stds * noise

    return latents