def spectral_learning(epoch): model.train() enum_train = enumerate(train_loader) for batch_idx, (data, Y) in enum_train: if args.cuda: data, Y = data.cuda(), Y.cuda() data, Y = Variable(data), Variable(Y) optimizer.zero_grad() F = model.forward(data) G = grad_F(F, Y) F.backward(gradient=G) optimizer.step() objective_value = Y.size()[1] - torch.sum( torch.mm(pseudo_inverse(Y), F) * torch.mm(pseudo_inverse(F), Y).t()) print("epoch %d --- loss value= %f" % (epoch, objective_value))
def constraint(self): i, o, do, cl = self.B clsign = (2 * cl - 1) # ipdb.set_trace() # def fun(*args): # # self.model.load_state_dict(w) # for p, pm in zip(self.model.parameters(), args): # p.data = pm # return self.model._net(i) # J = jacobian(fun, tuple(self.model.parameters())) self.zero_grad() Jdata = { k: np.zeros((i.shape[0], p.flatten().shape[0])) for k, p in self.named_parameters() if p.requires_grad } ndim = i.shape[1] Jderiv = [{k: np.zeros_like(j) for k, j in Jdata.items()} for d in range(ndim)] o_ = torch.zeros(o.shape) do_ = torch.zeros(do.shape) deriv = torch.zeros(do.shape) for ind in range(i.shape[0]): ici = i[ind:ind + 1] ici.requires_grad = True oci = self.model(ici) o_[ind] = oci # Direct prediction oci.backward() # oci.backward(create_graph=True) for k, p in self.named_parameters(): if p.requires_grad: Jdata[k][ind] = (clsign[ind] * p.grad).detach().flatten().numpy() p.grad *= 0 # Derivative prediction # for d in range(ndim): # ici = i[ind:ind+1] # ici.requires_grad = True # self.model.zero_grad() # oci = self.model(ici) # doci = grad(oci, [ici], create_graph=True)[0].squeeze() # # do_[ind] = ici.grad.detach() # do_[ind] = doci.detach() # # doci = ici.grad.squeeze() # # self.model.zero_grad() # # ipdb.set_trace() # drv = (do[ind, d] ** 2 - doci[d] ** 2) # deriv[ind, d] = drv # drv.backward() # # drv.backward(retain_graph=True) # for k, p in self.named_parameters(): # if p.requires_grad: # if p.grad is not None: # Jderiv[d][k][ind] = p.grad.flatten().numpy() # p.grad *= 0 funcs = { 'data': ((o_ - o) * clsign).detach().squeeze().numpy(), 'obj': 0., 'grads': { 'data': Jdata, 'obj': {k: Jdata[k][0] * 0 for k in Jdata} } } # for d in range(ndim): # funcs['deriv_'+str(d)] = deriv[:, d].detach().squeeze().numpy() # funcs['grads']['deriv_'+str(d)] = Jderiv[d] funcs['classification_loss'] = np.maximum(funcs['data'], 0).sum() F = 0 self.model.zero_grad() for k, p in self.named_parameters(): if p.requires_grad and 'weight' in k: # ipdb.set_trace() # funcs['grads'][k] = {k:grad(f, [p])[0].detach().flatten().numpy()} F = F + (p @ p.T @ p - p).square().sum() F.backward() funcs['obj'] = F.detach().numpy() funcs['grads']['obj'] = { k: p.grad.detach().flatten().numpy() for k, p in self.named_parameters() if p.requires_grad and 'weight' in k } return funcs
def roi_pooling0(input, rois, size=(7, 7), spatial_scale=1.0): # cffi version F = RoIPoolFunction(size[0], size[1], spatial_scale) output = F(input, rois) if has_backward: F.backward(output.data.clone()) return output