def dis_loss(self, z): zReal = self.prior(z.size(0)).type_as(z) pReal = self.discriminate(zReal) zFake = z.detach() #detach so grad only goes thru dis pFake = self.discriminate(zFake) ones = torch.Tensor(pReal.size()).fill_(1).type_as(pReal) zeros = torch.Tensor(pFake.size()).fill_(0).type_as(pFake) return 0.5 * torch.mean(bce(pReal, ones) + bce(pFake, zeros))
def rec_loss(self, rec_x, x, loss='BCE'): if loss == 'BCE': return torch.mean(bce(rec_x, x, size_average=True)) #not averaged over mini-batch if size_average=FALSE and is averaged if =True elif loss == 'MSE': return torch.mean(F.mse_loss(rec_x, x, size_average=True)) else: print 'unknown loss:'+loss
def test_compute_losses(self): boxes = torch.tensor([[[-0.75, 0, -0.25, 1]]]) labels = torch.tensor([[1]]) gt = self.grid.encode(boxes, labels) boxes = torch.tensor([[[-1., 0, 0, 1]]]) labels = torch.tensor([[0]]) out = self.grid.encode(boxes, labels) bl, cl = self.grid.compute_losses(out, gt) bl, cl = bl.item(), cl.item() exp_bl = l1_loss(torch.tensor([0, 0, 1, 0.5]), torch.tensor([0, 0, 2, 0.5])).item() self.assertEqual(bl, exp_bl) num_class_els = 16 exp_cl = ((2 * bce(torch.tensor(1.), torch.tensor(0.))).item() / num_class_els) self.assertEqual(cl, exp_cl)
def compute_losses(self, out, gt): """Compute losses given network output and encoded ground truth. Args: out: tensor (b, ad, g, g) gt: tensor (b, ad, g, g) Returns: box_loss, class_loss (both tensor (1)) """ batch_sz = out.shape[0] # (b, a, d, gg) out = out.reshape((batch_sz, self.num_ancs, self.det_sz, -1)) gt = gt.reshape((batch_sz, self.num_ancs, self.det_sz, -1)) # (b, a, 4, gg) out_anc_params = out[:, :, :4, :] gt_anc_params = gt[:, :, :4, :] # (b, a, c, gg) out_probs = out[:, :, 4:, :] gt_probs = gt[:, :, 4:, :] # TODO: switch to use logits version? class_loss = bce(out_probs, gt_probs) # [b, a, 1, gg] has_object = gt_probs.sum(2, keepdim=True) != 0 # (-1) out_anc_params = out_anc_params.masked_select(has_object) gt_anc_params = gt_anc_params.masked_select(has_object) box_loss = l1_loss(out_anc_params, gt_anc_params) return box_loss, class_loss
logVar=outLogVar) loss = bceLoss + opts.alpha * klLoss loss.backward(retain_graph=True) #fill in grads optimizer_VAE.step() #DO the optimization step later - cause using a reconstruction loss to do a step too ####### CLASSER ####### #get ouput, clac loss, calc all grads, optimise ## - 3 components to the classification loss # #1 classification loss on the training data smaples # #2 classifcation loss on the reconstructed data samples # #3 classification loss on the flipped samples - DO NOT USE TO UPDATE CLASSIFIER - USED TO UPDATE DELTA Z! optimizer_CLASSER.zero_grad() predY = classer.forward(x) classLoss = bce(predY.type_as(y), y) classLoss.backward(retain_graph=True) optimizer_CLASSER.step() optimizer_VAE.zero_grad() if opts.jointClassLoss: predYrec = classer.forward( outRec) #Do not update classer with this loss! classLossRec = opts.phi * bce(predYrec.type_as(y), y) classLossRec.backward( retain_graph=True ) #will be updating the encoder and decoder!!! can detach else where to NOT do this! optimizer_VAE.step() ####### DELTA Z ####### optimizer_DZ.zero_grad()
classLoss = class_loss_fn(pred=predY, target=y) vaeLoss += opts.rho * classLoss #DIS loss pXreal = dis(x) pXfakeRec = dis(outRec.detach()) zRand = sample_z(x.size(0), opts.nz, cvae.useCUDA) yRand = Variable( torch.eye(2)[torch.LongTensor( y.data.cpu().numpy())]).type_as(zRand) pXfakeRand = dis(cvae.decode(yRand, zRand).detach()) fakeLabel = Variable(torch.Tensor( pXreal.size()).zero_()).type_as(pXreal) realLabel = Variable(torch.Tensor( pXreal.size()).fill_(1)).type_as(pXreal) disLoss = 0.3 * (bce(pXreal, realLabel, size_average=False) + \ bce(pXfakeRec, fakeLabel, size_average=False) + \ bce(pXfakeRand, fakeLabel, size_average=False)) / pXreal.size(1) #GEN loss pXfakeRec = dis(outRec) pXfakeRand = dis(cvae.decode(yRand, zRand)) genLoss = 0.5 * (bce(pXfakeRec, realLabel,size_average=False) +\ bce(pXfakeRand, realLabel, size_average=False)) / pXfakeRec.size(1) #include the GENloss (the encoder loss) with the VAE loss vaeLoss += opts.delta * genLoss #zero the grads - otherwise they will be acculated #fill in grads and do updates: optimizerCVAE.zero_grad()
if mixup == 0: return d1, l1 d2, l2 = one_batch() alpha = Variable(torch.randn(d1.size(0), 1).uniform_(0, mixup)) if use_cuda: alpha = alpha.cuda() d = alpha * d1 + (1. - alpha) * d2 l = alpha * l1 + (1. - alpha) * l2 return d, l for iteration in tqdm(range(n_iterations)): for extra in range(extraD): data, labels = mixup_batch(mixup) optD.zero_grad() lossD = bce(netD(data), labels) lossD.backward() optD.step() data, labels = mixup_batch(0) optG.zero_grad() lossG = -bce(netD(data), labels) lossG.backward() optG.step() if iteration in [10, 100, 1000, 10000, 20000]: plot_real = p_real.cpu().data.numpy() plot_fake = netG(p_nois).cpu().data.numpy() torch.save((plot_real, plot_fake), 'samples/example_z=%d_%s_%1.1f_%06d.pt' %
def gen_loss(self, z): # n.b. z is not detached so it will update the models it has passed thru pFake = self.discriminate(z) ones = torch.Tensor(pFake.size()).fill_(1).type_as(pFake) return torch.mean(bce(pFake, ones))
def objective_gan(fakeD, realD): labD = torch.cat((torch.ones(fakeD.size(0), 1) - 1e-3, torch.zeros(realD.size(0), 1) + 1e-3)) return bce(torch.cat((fakeD, realD)), Variable(labD))