Ejemplo n.º 1
0
def main(args):
    dataset = get_dataset()
    model = get_model(dataset).to(_device())
    loader = torch_data.DataLoader(
        dataset,
        batch_size=FLAGS.n_batch,
        collate_fn=dataset.collate,
        shuffle=True,
    )

    opt = optim.Adam(model.parameters(), lr=0.001)
    for i_epoch in range(FLAGS.n_epochs):
        epoch_loss = 0
        for i_batch, (target, exemplars) in enumerate(loader):
            target = torch.tensor(target, device=_device())
            exemplars = torch.tensor(exemplars, device=_device())
            n_seq, n_batch, n_ex = exemplars.shape
            loss = model(target, exemplars, epoch=i_epoch)
            opt.zero_grad()
            loss.backward()
            opt.step()
            epoch_loss += loss.item()

        print(i_epoch, 1 / (1 + np.exp(-(i_epoch / 5 - 10))))
        print(epoch_loss / (i_batch + 1))
        print()
        for extrapolate in (True, False):
            eval_seqs, eval_batch = dataset.eval_batch(100, extrapolate)
            samples = model.sample(torch.tensor(eval_batch, device=_device()),
                                   100)
            print(f"extrapolate={extrapolate}")
            evaluation.visualize(eval_seqs[:5], samples[:5], dataset)
            print(evaluation.compute_coverage(samples, dataset))
            print()
        print()
Ejemplo n.º 2
0
    def forward(self, target, exemplars, epoch, **kwargs):
        assert FLAGS.n_layers == 1
        n_seq, n_batch = target.shape
        n_ex_seq, _, n_ex = exemplars.shape
        assert n_ex == 3

        exemplars = exemplars.view(n_ex_seq, n_batch * n_ex)
        ex_embedding = self.embed(exemplars)
        _, (ex_encoding, _) = self.enc(ex_embedding)
        ex_encoding = ex_encoding.view(n_batch, n_ex, FLAGS.n_hidden)
        pred_tgt = ex_encoding[:, 1, :] + ex_encoding[:,
                                                      2, :] - ex_encoding[:,
                                                                          0, :]
        pred_tgt = pred_tgt.unsqueeze(0)

        recon_scale = 0.5 * 1 / (1 + np.exp(-(epoch / 5 - 10)))

        tgt_embedding = self.embed(target)
        _, (tgt_encoding, _) = self.enc(tgt_embedding)

        mask = (torch.rand(1, n_batch, 1, device=_device()) <
                recon_scale).expand_as(pred_tgt).float()
        rep = mask * pred_tgt + (1 - mask) * tgt_encoding

        hid = (rep, torch.zeros_like(rep))
        tgt_decoding, _ = self.dec(tgt_embedding[:-1, :, :], hid)
        tgt_pred = self.pred(tgt_decoding).view((n_seq - 1) * n_batch,
                                                self.n_tokens)

        return (
            recon_scale * self.representation_loss(pred_tgt, tgt_encoding) +
            self.reconstruction_loss(tgt_pred, target[1:, :].view(
                (n_seq - 1) * n_batch)))
Ejemplo n.º 3
0
    def forward(self, target, exemplars, epoch, **kwargs):
        del exemplars
        n_seq, n_batch = target.shape
        inp = target[:-1, :]
        out = target[1:, :].view((n_seq - 1) * n_batch)

        enc_embedding = self.embed(target)
        _, (enc_encoding, _) = self.rnn(enc_embedding)
        mean = self.mean(enc_encoding)
        log_std = self.log_std(enc_encoding)
        std = torch.exp(log_std)

        #prior_kl = ((mean ** 2 + std ** 2 - 2 * log_std - 1) / 2).mean()
        prior_kl = ((mean**2) / 2).mean()

        noise = torch.normal(mean=0, std=1, size=mean.shape, device=_device())
        #encoding = (mean + std * noise, torch.zeros_like(mean))
        encoding = (mean + noise, torch.zeros_like(mean))

        dec_embedding = self.embed(inp)
        dec_representation, _ = self.rnn(dec_embedding, encoding)
        prediction = self.predict(dec_representation)
        prediction = prediction.view((n_seq - 1) * n_batch, self.n_tokens)
        pred_nlprob = self.loss(prediction, out)

        kl_weight = 10 * 1 / (1 + np.exp(-(epoch - 5)))

        return kl_weight * prior_kl + pred_nlprob
Ejemplo n.º 4
0
 def sample(self, exemplars, count):
     init_state = [
         torch.zeros(1, count, FLAGS.n_hidden),
         torch.zeros(1, count, FLAGS.n_hidden)
     ]
     init_state = [t.to(_device()) for t in init_state]
     return _sample(self.embed,
                    self.rnn,
                    self.predict,
                    init_state,
                    init_token=1,
                    stop_token=10,
                    count=count)
Ejemplo n.º 5
0
def _sample(embed,
            rnn,
            predict,
            init_state,
            init_token,
            stop_token,
            count=1,
            max_len=40,
            greedy=False):
    assert init_state[0].shape[1] == count
    with torch.no_grad():
        out = [[init_token] for _ in range(count)]
        last_state = init_state
        last_token = init_token * torch.ones(
            (1, count), dtype=torch.int64, device=_device())
        for i in range(max_len):
            hidden, next_state = rnn(embed(last_token), last_state)
            probs = F.softmax(predict(hidden), dim=2).detach().cpu().numpy()
            next_token = []
            for j in range(count):
                if greedy:
                    token = np.argmax(probs[0, j, :])
                else:
                    token = np.random.choice(probs.shape[2], p=probs[0, j, :])
                out[j].append(token)
                next_token.append(token)
            last_state = next_state
            last_token = torch.tensor([next_token],
                                      dtype=torch.int64,
                                      device=_device())

    out_clean = []
    for seq in out:
        if stop_token in seq:
            seq = seq[:seq.index(stop_token) + 1]
        seq = [t for t in seq if t != 0]
        out_clean.append(seq)

    return out_clean
Ejemplo n.º 6
0
 def sample(self, exemplars, count):
     assert False
     enc = torch.normal(mean=0,
                        std=1,
                        size=(1, count, FLAGS.n_hidden),
                        device=_device())
     init_state = (enc, torch.zeros_like(enc))
     return _sample(self.embed,
                    self.dec_rnn,
                    self.predict,
                    init_state,
                    init_token=1,
                    stop_token=10,
                    count=count)
Ejemplo n.º 7
0
    def forward(self, target, exemplars, **kwargs):
        n_seq, n_batch = target.shape
        n_ex_seq, _, n_ex = exemplars.shape
        inp = target[:-1, :]
        out = target[1:, :].view((n_seq - 1) * n_batch)

        exemplars = exemplars.view(n_ex_seq, n_batch * n_ex)
        ex_embedding = self.embed(exemplars)
        _, (ex_encoding, _) = self.enc_rnn(ex_embedding)
        ex_encoding = ex_encoding.view(n_batch, n_ex, FLAGS.n_hidden)

        enc_embedding = self.embed(target)
        _, (enc_encoding, _) = self.enc_rnn(enc_embedding)

        enc_encoding = enc_encoding.squeeze(0).unsqueeze(1).expand_as(
            ex_encoding)
        attention_weights = (enc_encoding * ex_encoding).sum(dim=2,
                                                             keepdim=True)
        attention_weights = F.softmax(attention_weights, dim=1)

        weighted_ex = (ex_encoding *
                       attention_weights.expand_as(ex_encoding)).sum(
                           dim=1).unsqueeze(0)
        mean = self.mean(weighted_ex)
        log_std = self.log_std(weighted_ex)
        std = torch.exp(log_std)

        prior_kl = ((mean**2 + std**2 - 2 * log_std - 1) / 2).mean()

        noise = torch.normal(mean=0, std=1, size=mean.shape, device=_device())
        encoding = (mean + std * noise, torch.zeros_like(mean))

        dec_embedding = self.embed(inp)
        dec_representation, _ = self.dec_rnn(dec_embedding, encoding)
        prediction = self.predict(dec_representation)
        prediction = prediction.view((n_seq - 1) * n_batch, self.n_tokens)
        pred_nlprob = self.loss(prediction, out)

        return prior_kl + pred_nlprob