Beispiel #1
0
            enc_inputs = enc_inputs.to(device)
            dec_inputs = dec_inputs.to(device)
            targets = targets.to(device)
            lengths = lengths.to(device)

            # forward
            logp, z_0, z_T, mu, logvar = model(enc_inputs, dec_inputs, lengths)
            

            # calculate loss
            NLL_loss = NLL(logp, targets, lengths + 1)
            # KL loss
            log_p_z = log_Normal_standard(z_T, dim=1)
            log_q_z = log_Normal_diag(z_0, mu, logvar, dim=1)
            KL_loss = torch.sum(-(log_p_z - log_q_z))
            KL_weight = linear_anneal(step, len(dataloaders['train']) * 10)
            loss = (NLL_loss + KL_weight * KL_loss) / bsize         
            
            
            # cumulate
            totals['ELBO'] += loss.item() * bsize
            totals['NLL'] += NLL_loss.item()
            totals['KL'] += KL_loss.item()
            totals['words'] += torch.sum(lengths).item()

            # backward and optimize
            if split == 'train':
                step += 1
                optimizer.zero_grad()
                loss.backward()
                nn.utils.clip_grad_norm_(model.parameters(), 5)
            targets = targets.to(device)
            lengths = lengths.to(device)

            # forward
            logp, mu, logvar, kld, aux_loss = model(enc_inputs, dec_inputs,
                                                    lengths)

            # calculate loss
            NLL_loss = NLL(logp, targets, lengths + 1)
            KL_loss = KL_div(mu, logvar)
            if ep < 5:
                KL_weight = 0.1
                beta = 0.
            else:
                KL_weight = linear_anneal(step - len(dataloaders['train']) * 5,
                                          len(dataloaders['train']) * 5,
                                          initial=0.1)
                beta = 0.5
            if ep < 1:
                RNN_weight = 0.05
            else:
                RNN_weight = linear_anneal(step -
                                           len(dataloaders['train']) * 1,
                                           len(dataloaders['train']) * 4,
                                           initial=0.05)
            loss = (NLL_loss + KL_weight * KL_loss + RNN_weight * kld +
                    beta * aux_loss) / bsize

            # cumulate
            totals['ELBO'] += loss.item() * bsize
            totals['NLL'] += NLL_loss.item()