def __init__(self, args): self.s = args.s self.z = args.z self.batch_size = args.batch_size self.epochs = 200 self.alpha = 1 self.beta = args.beta self.target = args.target self.use_bn = args.use_bn self.bias = args.bias self.n_hidden = args.n_hidden self.pretrain_e = args.pretrain_e self.dataset = args.dataset self.test_ensemble = args.test_ensemble self.test_uncertainty = args.test_uncertainty self.vote = args.vote self.device = torch.device('cuda') torch.manual_seed(8734) self.hypergan = HyperGAN(args, self.device) self.hypergan.print_hypergan() self.hypergan.attach_optimizers(5e-3, 1e-4, 5e-5) if self.dataset == 'mnist': self.data_train, self.data_test = datagen.load_mnist() elif self.dataset == 'cifar': self.data_train, self.data_test = datagen.load_cifar() self.best_test_acc = 0. self.best_test_loss = np.inf
def _run_anomaly_mnist(args, hypernet): arch = get_network(args) train, test = datagen.load_mnist(args) _vars, _stds, _ents = [], [], [] model = sample_model(hypernet, arch) for n in [5, 10, 100]: for idx, (data, target) in enumerate(test): data, target = data.cuda(), target.cuda() pred_labels = [] for _ in range(n): model = sample_model(hypernet, arch) output = model(data) pred = output.data.max(1, keepdim=True)[1] pred_labels.append(pred.view(pred.numel())) p_labels = torch.stack(pred_labels).float().transpose(0, 1) _vars.append(p_labels.var(1).mean()) _stds.append(p_labels.std(1).mean()) _ents.append(np.apply_along_axis(entropy, 1, p_labels)) plot_empirical_cdf(args, _ents, n) print('mean var: {}, max var: {}, min var:{}, std: {}'.format( torch.tensor(_vars).mean(), torch.tensor(_vars).max(), torch.tensor(_vars).min(), torch.tensor(_stds).mean()))
def train(args, model, grad=False): train_loader, _ = datagen.load_mnist(args) train_loss, train_acc = 0., 0. criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=1e-3) for epoch in range(args.epochs): model.train() correct = 0. train_loss = 0. for batch_idx, (data, target) in enumerate(train_loader): data, target = data.cuda(), target.cuda() optimizer.zero_grad() output = model(data) loss = criterion(output, target) loss.backward() optimizer.step() pred = output.data.max( 1, keepdim=True)[1] # get the index of the max log-probability correct += pred.eq(target.data.view_as(pred)).long().cpu().sum() train_loss += loss train_loss /= len(train_loader.dataset) acc = (correct.float() / len(train_loader.dataset)).item() print('train_acc: {}, train loss: {}'.format(acc, train_loss)) acc, loss = test(args, model) return acc, loss
def test_mnist(args, Z, names, arch): _, test_loader = datagen.load_mnist(args) criterion = nn.CrossEntropyLoss() pop_size = args.batch_size with torch.no_grad(): correct = 0. test_loss = 0 for data, target in test_loader: data, target = data.cuda(), target.cuda() outputs = [] for i in range(pop_size): params = [Z[0][i], Z[1][i], Z[2][i]] model = weights_to_clf(params, names, arch) output = model(data) outputs.append(output) pop_outputs = torch.stack(outputs) pop_labels = pop_outputs.max(2, keepdim=True)[1].view( pop_size, 100, 1) modes = torch.mode(pop_labels, dim=0, keepdim=True)[0].view(100, ) test_loss += criterion(output, target).item() # sum up batch loss pred = output.data.max(1, keepdim=True)[1] correct += pred.eq(target.data.view_as(pred)).long().cpu().sum() test_loss /= len(test_loader.dataset) acc = (correct.float() / len(test_loader.dataset)).item() return acc, test_loss
def train(args, model, grad=False): if args.dataset == 'mnist': train_loader, _ = datagen.load_mnist(args) elif args.dataset == 'fashion_mnist': train_loader, _ = datagen.load_fashion_mnist(args) train_loss, train_acc = 0., 0. criterion = nn.CrossEntropyLoss() if args.ft: for child in list(model.children())[:2]: print('removing {}'.format(child)) for param in child.parameters(): param.requires_grad = False optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-3) for epoch in range(args.epochs): model.train() for batch_idx, (data, target) in enumerate(train_loader): data, target = data.cuda(), target.cuda() optimizer.zero_grad() output = model(data) loss = criterion(output, target) loss.backward() optimizer.step() acc, loss = test(args, model, epoch) return acc, loss
def test(args, model, epoch=None, grad=False): model.eval() if args.dataset == 'mnist': _, test_loader = datagen.load_mnist(args) elif args.dataset == 'fashion_mnist': _, test_loader = datagen.load_fashion_mnist(args) test_loss = 0 correct = 0. criterion = nn.CrossEntropyLoss() for data, target in test_loader: data, target = data.cuda(), target.cuda() output = model(data) if grad is False: test_loss += criterion(output, target).item() # sum up batch loss else: test_loss += criterion(output, target) pred = output.data.max( 1, keepdim=True)[1] # get the index of the max log-probability correct += pred.eq(target.data.view_as(pred)).long().cpu().sum() test_loss /= len(test_loader.dataset) acc = (correct.float() / len(test_loader.dataset)).item() print(acc) if epoch: print('Average loss: {}, Accuracy: {}/{} ({}%)'.format( test_loss, correct, len(test_loader.dataset), 100. * correct / len(test_loader.dataset))) return acc, test_loss
def eval_mnist_ensemble(ensemble, outlier=False): for model in ensemble: model.eval() if outlier is True: trainloader, testloader = datagen.load_notmnist() else: trainloader, testloader = datagen.load_mnist() model_outputs = torch.zeros(len(ensemble), len(testloader.dataset), 10) for i, (data, target) in enumerate(testloader): data = data.cuda() target = target.cuda() outputs = [] for model in ensemble: outputs.append(model(data)) outputs = torch.stack(outputs) model_outputs[:, i * len(data):(i + 1) * len(data), :] = outputs # Soft Voting (entropy in confidence) probs_soft = F.softmax(model_outputs, dim=-1) # [ens, data, 10] preds_soft = probs_soft.mean(0) # [data, 10] entropy = entropy_fn(preds_soft.T.cpu().numpy()) # [data] # Hard Voting (variance in predicted classed) probs_hard = F.softmax(model_outputs, dim=-1) #[ens, data, 10] preds_hard = probs_hard.var(0).cpu() # [data, 10] variance = preds_hard.sum(1).numpy() # [data] for model in ensemble: model.train() return entropy, variance
def eval_mnist_hypergan(hypergan, ens_size, s_dim, outlier=False): hypergan.eval_() if outlier is True: trainloader, testloader = datagen.load_notmnist() else: trainloader, testloader = datagen.load_mnist() model_outputs = torch.zeros(ens_size, len(testloader.dataset), 10) for i, (data, target) in enumerate(testloader): data = data.cuda() target = target.cuda() z = torch.randn(ens_size, s_dim).to(hypergan.device) codes = hypergan.mixer(z) params = hypergan.generator(codes) outputs = [] for (layers) in zip(*params): output = hypergan.eval_f(layers, data) outputs.append(output) outputs = torch.stack(outputs) model_outputs[:, i * len(data):(i + 1) * len(data), :] = outputs # Soft Voting (entropy in confidence) probs_soft = F.softmax(model_outputs, dim=-1) # [ens, data, 10] preds_soft = probs_soft.mean(0) # [data, 10] entropy = entropy_fn(preds_soft.T.cpu().numpy()) # [data] # Hard Voting (variance in predicted classed) probs_hard = F.softmax(model_outputs, dim=-1) #[ens, data, 10] preds_hard = probs_hard.var(0).cpu() # [data, 10] variance = preds_hard.sum(1).numpy() # [data] hypergan.train_() return entropy, variance
def test_ent(args, model): from scipy.stats import entropy _, test_loader = datagen.load_mnist(args) runs = [] ent_runs = [] criterion = nn.CrossEntropyLoss() with torch.no_grad(): correct = 0. preds = [] test_loss = 0 for data, target in test_loader: data, target = data.cuda(), target.cuda() outputs = [] for _ in range(100): output = model(data) outputs.append(output) if grad is False: test_loss += criterion(output, target).item() # sum up batch loss else: test_loss += criterion(output, target) pred = output.data.max( 1, keepdim=True)[1] # get the index of the max log-probability correct += pred.eq(target.data.view_as(pred)).long().cpu().sum() preds.append(F.softmax(torch.stack(outputs).mean(0), dim=1)) test_loss /= len(test_loader.dataset) acc = (correct.float() / len(test_loader.dataset)).item() preds = torch.stack(preds).view(-1, 10) print(preds) preds = preds.cpu().detach().numpy() print('PREDS: ', preds.shape) ent = entropy(preds.T) print('ENT: ', ent, ent.shape, ent.mean()) """ def plot_e(a): ecdf = sm.distributions.ECDF(a) x = np.linspace(min(a), max(a)) y = ecdf(x) return x, y a1, b1, = plot_e(ent) plt.plot(a1, b1, label='inlier dropout') plt.grid(True) plt.xlabel('Entropy') plt.show() """ if epoch: print('Average loss: {}, Accuracy: {}/{} ({}%)'.format( test_loss, correct, len(test_loader.dataset), 100. * correct / len(test_loader.dataset))) return acc, test_loss
def load_data(args): if args.dataset == 'mnist': return datagen.load_mnist(args) if args.dataset == 'cifar': return datagen.load_cifar(args) if args.dataset == 'fmnist': return datagen.load_fashion_mnist(args) if args.dataset == 'cifar_hidden': class_list = [0] ## just load class 0 return datagen.load_cifar_hidden(args, class_list) else: print ('Dataset not specified correctly') print ('choose --dataset <mnist, fmnist, cifar, cifar_hidden>')
def run_adv_hyper(args, hypernet): arch = get_network(args) arch.lnames = args.stat['layer_names'] model_base, fmodel_base = sample_fmodel(hypernet, arch) fgs = foolbox.attacks.HyperBIM(fmodel_base) _, test_loader = datagen.load_mnist(args) adv, y = [], [] for eps in [0.2, .3, 1.0]: total_adv = 0 acc, _accs, _vars, _stds = [], [], [], [] for idx, (data, target) in enumerate(test_loader): data, target = data.cuda(), target.cuda() adv_batch, target_batch, _ = attack_batch_hyper( data, target, fmodel_base, eps, fgs, hypernet, arch) if adv_batch is None: continue output = model_base(adv_batch) pred = output.data.max(1, keepdim=True)[1] correct = pred.eq(target_batch.data.view_as(pred)).long().cpu().sum() n_adv = len(target_batch) - correct.item() total_adv += n_adv padv = np.argmax(fmodel_base.predictions( adv_batch[0].cpu().numpy())) sample_adv, pred_labels = [], [] for _ in range(10): model, fmodel = sample_fmodel(hypernet, arch) output = model(adv_batch) pred = output.data.max(1, keepdim=True)[1] correct = pred.eq(target_batch.data.view_as(pred)).long().cpu().sum() acc.append(correct.item()) n_adv_sample = len(target_batch)-correct.item() sample_adv.append(n_adv_sample) pred_labels.append(pred.view(pred.numel())) p_labels = torch.stack(pred_labels).float().transpose(0, 1) acc = torch.tensor(acc, dtype=torch.float) _accs.append(torch.mean(acc)) _vars.append(p_labels.var(1).mean()) _stds.append(p_labels.std(1).mean()) acc, adv, y = [], [], [] print ('Eps: {}, Adv: {}/{}, var: {}, std: {}'.format(eps, total_adv, len(test_loader.dataset), torch.tensor(_vars).mean(), torch.tensor(_stds).mean()))
def test_acc_single(args, Z, names, arch, pop_size): _, test_loader = datagen.load_mnist(args) criterion = nn.CrossEntropyLoss() with torch.no_grad(): correct = 0. for data, target in test_loader: data, target = data.cuda(), target.cuda() outputs = [] for i in range(pop_size): params = [Z[0][i], Z[1][i], Z[2][i]] model = weights_to_clf(params, names, arch) output = model(data) outputs.append(output) pop_outputs = F.softmax(torch.stack(outputs).mean(0), dim=1) pop_outputs = pop_outputs.view(-1, 10) pred = pop_outputs.max(1, keepdim=True)[1] correct += pred.eq(target.data.view_as(pred)).long().cpu().sum() # loss is over one network only, I don't want to backcalc the loss from indexes acc = (correct.float() / len(test_loader.dataset)).item() return acc, 0
def measure_acc(args, hypernet, arch): _, test_loader = datagen.load_mnist(args) test_loss = 0 correct = 0. criterion = nn.CrossEntropyLoss() e1, e5, e10, e100 = 0., 0., 0., 0. for n in [1, 5, 10, 100]: test_acc = 0. test_loss = 0. weights = utils.sample_hypernet(hypernet, n) for i, (data, y) in enumerate(mnist_test): n_votes = [] for k in range(n): sample_w = (weights[0][k], weights[1][k], weights[2][k]) model = utils.weights_to_clf(sample_w, arch, args.stat['layer_names']) votes = model(data) n_votes.append(votes.cpu().numpy()) votes = np.array(n_votes) vote_modes = stats.mode(votes, axis=0)[0] vote_modes = torch.tensor(vote_modes) if n == 2: e1 += vote_modes.eq( y.data.view_as(vote_modes)).long().cpu().sum() elif n == 5: e5 += vote_modes.eq( y.data.view_as(vote_modes)).long().cpu().sum() elif n == 10: e10 += vote_modes.eq( y.data.view_as(vote_modes)).long().cpu().sum() elif n == 100: e100 += vote_modes.eq( y.data.view_as(vote_modes)).long().cpu().sum() test_loss /= len(mnist_test.dataset) * args.batch_size test_acc /= len(mnist_test.dataset) * args.batch_size e1 = e1.item() / len(mnist_test.dataset) e5 = e5.item() / len(mnist_test.dataset) e10 = e10.item() / len(mnist_test.dataset) e100 = e100.item() / len(mnist_test.dataset) print('Test Accuracy: {}, Test Loss: {}'.format(test_acc, test_loss))
def run_adv_model(args, models): fmodels = [attacks.load_model(model) for model in models] criterion = Misclassification() fgs = foolbox.attacks.HyperBIM(fmodels[0]) _, test_loader = datagen.load_mnist(args) adv, y, inter = [], [], [] acc, _accs = [], [] total_adv, total_correct = 0, 0 missed = 0 for eps in [0.01, 0.03, 0.08, 0.1, .2, .3, 1]: total_adv = 0 _accs, _vars, _stds = [], [], [] pred_labels = [] for data, target in test_loader: data, target = data.cuda(), target.cuda() adv_batch, target_batch, _ = attack_batch_ensemble(data, target, eps, fgs, fmodels) if adv_batch is None: continue n_adv = 0. acc, pred_labels = [], [] output = ensemble_prediction(models, adv_batch) for i in range(5): pred = output[i].data.max(1, keepdim=True)[1] correct = pred.eq(target_batch.data.view_as(pred)).long().cpu().sum() n_adv += len(target_batch)-correct.item() pred_labels.append(pred.view(pred.numel())) ens_pred = output.mean(0).data.max(1, keepdim=True)[1] ens_correct = ens_pred.eq(target_batch.data.view_as(pred)).long().cpu().sum() total_adv += len(target_batch) - ens_correct.item() p_labels = torch.stack(pred_labels).float().transpose(0, 1) _vars.append(p_labels.var(1).mean()) _stds.append(p_labels.std(1).mean()) acc, adv, y = [], [], [] print ('Eps: {}, Adv: {}/{}, var: {}, std: {}'.format(eps, total_adv, len(test_loader.dataset), torch.tensor(_vars).mean(), torch.tensor(_stds).mean()))
def __init__(self, args): self.lr = args.lr self.wd = args.wd self.epochs = 200 self.dataset = args.dataset self.test_uncertainty = args.test_uncertainty self.vote = args.vote self.device = torch.device('cuda') torch.manual_seed(8734) self.model = models.LeNet_Dropout().to(self.device) self.optimizer = torch.optim.Adam(model.parameters(), self.lr, weight_decay=self.wd) if self.dataset == 'mnist': self.data_train, self.data_test = datagen.load_mnist() elif self.dataset == 'cifar': self.data_train, self.data_test = datagen.load_cifar() self.best_test_acc = 0. self.best_test_loss = np.inf print (self.model)
def run_anomaly_mnist(args, hypernet): arch = get_network(args) train, test = datagen.load_mnist(args) _vars, _stds, _ents = [], [], [] model = sample_model(hypernet, arch) for n in [5, 10, 100]: for idx, (data, target) in enumerate(test): data, target = data.cuda(), target.cuda() pred_labels = [] logits = [] for _ in range(n): model = sample_model(hypernet, arch) output = model(data) logits.append(output) probs = torch.stack(logits).mean(0).float() _ents.append(np.apply_along_axis(E, 1, probs.detach())) plot_empirical_cdf(args, _ents, n) print('mean E: {}, max E: {}, min E:{}'.format( torch.tensor(_ents).mean(), torch.tensor(_ents).max(), torch.tensor(_ents).min()))
def test_ensemble(args, models, pop_size): _, test_loader = datagen.load_mnist(args) criterion = nn.CrossEntropyLoss() with torch.no_grad(): correct = 0. test_loss = 0 for data, target in test_loader: data, target = data.cuda(), target.cuda() outputs = [] for model in models: output = model(data) outputs.append(output) pop_outputs = torch.stack(outputs) pop_labels = pop_outputs.max(2, keepdim=True)[1].view(pop_size, 100, 1) modes, idxs = torch.mode(pop_labels, dim=0, keepdim=True) modes = modes.view(100, 1) correct += modes.eq(target.data.view_as(modes)).long().cpu().sum() # loss is over one network only, I don't want to backcalc the loss from indexes test_loss += criterion(output, target).item() test_loss /= len(test_loader.dataset) acc = (correct.float() / len(test_loader.dataset)).item() return acc, test_loss
def test(args, model): from scipy.stats import entropy _, test_loader = datagen.load_mnist(args) criterion = nn.CrossEntropyLoss() with torch.no_grad(): correct = 0. preds = [] test_loss = 0 for data, target in test_loader: data, target = data.cuda(), target.cuda() output = model(data) test_loss += criterion(output, target).item() # sum up batch loss pred = output.data.max( 1, keepdim=True)[1] # get the index of the max log-probability correct += pred.eq(target.data.view_as(pred)).long().cpu().sum() test_loss /= len(test_loader.dataset) acc = (correct.float() / len(test_loader.dataset)).item() print('Average loss: {}, Accuracy: {}/{} ({}%)'.format( test_loss, correct, len(test_loader.dataset), 100. * correct / len(test_loader.dataset))) return acc, test_loss
def test_f(args, path, model, n): def clf(data, target, Z): data, target = data.cuda(), target.cuda() out = F.conv2d(data, Z[0], stride=1) out = F.relu(out) out = F.max_pool2d(out, 2, 2) out = F.conv2d(out, Z[1], stride=1) out = F.relu(out) out = F.max_pool2d(out, 2, 2) out = out.view(-1, 1024) out = F.linear(out, Z[2]) out = F.relu(out) out = F.linear(out, Z[3]) return out netE, W1, W2, W3, W4 = model x_dist = utils.create_d(args.ze) _, test_loader = datagen.load_mnist(args) correct = 0 z = utils.sample_d(x_dist, n) codes = netE(z) l1 = W1(codes[0]) l2 = W2(codes[1]) l3 = W3(codes[2]) l4 = W4(codes[3]) for i, (data, target) in enumerate(test_loader): data = data.cuda() target = target.cuda() outputs = [] for (g1, g2, g3, g4) in zip(l1, l2, l3, l4): output = clf(data, target, [g1, g2, g3, g4]) outputs.append(output) pop_outputs = torch.stack(outputs) pop_labels = pop_outputs.max(2, keepdim=True)[1].view(n, 100, 1) modes, idxs = torch.mode(pop_labels, dim=0, keepdim=True) modes = modes.view(100, 1) correct += modes.eq(target.data.view_as(modes)).long().cpu().sum() return correct.float().item() / 10000.
def test_ent(args, Z, names, arch, pop_size, ds): if ds == 'mnist': _, test_loader = datagen.load_mnist(args) if ds == 'notmnist': _, test_loader = datagen.load_notmnist(args) criterion = nn.CrossEntropyLoss() with torch.no_grad(): correct = 0. test_loss = 0 for data, target in test_loader: data, target = data.cuda(), target.cuda() outputs = [] for i in range(pop_size): params = [Z[0][i], Z[1][i], Z[2][i]] model = weights_to_clf(params, names, arch) output = F.softmax(model(data)) outputs.append(output) pop_outputs = torch.stack(outputs) pop_outputs = pop_outputs.view(pop_size, 10000, 10) pop_mean = pop_outputs.mean(0).view(10000, 10) ent = entropy(pop_mean.cpu().numpy().T) return ent
def __init__(self, args): self.lr = args.lr self.wd = args.wd self.epochs = 200 self.dataset = args.dataset self.test_uncertainty = args.test_uncertainty self.vote = args.vote self.n_models = args.n_models self.device = torch.device('cuda') torch.manual_seed(8734) self.ensemble = [ models.LeNet().to(self.device) for _ in range(self.n_models) ] self.attach_optimizers() if self.dataset == 'mnist': self.data_train, self.data_test = datagen.load_mnist() elif self.dataset == 'cifar': self.data_train, self.data_test = datagen.load_cifar() self.best_test_acc = 0. self.best_test_loss = np.inf print(self.ensemble[0], ' X {}'.format(self.n_models))
def train(args): torch.manual_seed(1) netE = models.Encoderz(args).cuda() W1 = models.GeneratorW1(args).cuda() W2 = models.GeneratorW2(args).cuda() W3 = models.GeneratorW3(args).cuda() netD = models.DiscriminatorQz(args).cuda() print(netE, W1, W2, W3) if args.resume is not None: d = torch.load(args.resume) netE = utils.load_net_only(netE, d['E']) netD = utils.load_net_only(netD, d['D']) W1 = utils.load_net_only(W1, d['W1']) W2 = utils.load_net_only(W2, d['W2']) W3 = utils.load_net_only(W3, d['W3']) optimE = optim.Adam(netE.parameters(), lr=args.lr, betas=(0.5, 0.9), weight_decay=5e-4) optimW1 = optim.Adam(W1.parameters(), lr=args.lr, betas=(0.5, 0.9), weight_decay=5e-4) optimW2 = optim.Adam(W2.parameters(), lr=args.lr, betas=(0.5, 0.9), weight_decay=5e-4) optimW3 = optim.Adam(W3.parameters(), lr=args.lr, betas=(0.5, 0.9), weight_decay=5e-4) optimD = optim.Adam(netD.parameters(), lr=args.lr, betas=(0.5, 0.9), weight_decay=5e-4) best_test_acc, best_clf_acc, best_test_loss, = 0., 0., np.inf args.best_loss, args.best_acc = best_test_loss, best_test_acc args.best_clf_loss, args.best_clf_acc = np.inf, 0 mnist_train, mnist_test = datagen.load_mnist(args) x_dist = utils.create_d(args.ze) z_dist = utils.create_d(args.z) qz_dist = utils.create_d(args.z * 3) one = torch.tensor(1.).cuda() mone = one * -1 print("==> pretraining encoder") j = 0 final = 100. e_batch_size = 1000 if args.resume is None: if args.pretrain_e is True: for j in range(1000): x = utils.sample_d(x_dist, e_batch_size) z = utils.sample_d(z_dist, e_batch_size) codes = torch.stack(netE(x)).view(-1, args.z * 3) qz = utils.sample_d(qz_dist, e_batch_size) mean_loss, cov_loss = ops.pretrain_loss(codes, qz) loss = mean_loss + cov_loss loss.backward() optimE.step() netE.zero_grad() print('Pretrain Enc iter: {}, Mean Loss: {}, Cov Loss: {}'. format(j, mean_loss.item(), cov_loss.item())) final = loss.item() if loss.item() < 0.1: print('Finished Pretraining Encoder') break print('==> Begin Training') for _ in range(args.epochs): for batch_idx, (data, target) in enumerate(mnist_train): z = utils.sample_d(x_dist, args.batch_size) ze = utils.sample_d(z_dist, args.batch_size) qz = utils.sample_d(qz_dist, args.batch_size) codes = netE(z) noise = utils.sample_d(qz_dist, args.batch_size) log_pz = ops.log_density(ze, 2).view(-1, 1) d_loss, d_q = ops.calc_d_loss(args, netD, ze, codes, log_pz) optimD.zero_grad() d_loss.backward(retain_graph=True) optimD.step() l1_w, l1_b = W1(codes[0]) l2_w, l2_b = W2(codes[1]) l3_w, l3_b = W3(codes[2]) clf_loss = 0 for (g1_w, g1_b, g2_w, g2_b, g3_w, g3_b) in zip(l1_w, l1_b, l2_w, l2_b, l3_w, l3_b): g1 = (g1_w, g1_b) g2 = (g2_w, g2_b) g3 = (g3_w, g3_b) loss, correct = train_clf(args, [g1, g2, g3], data, target) clf_loss += loss G_loss = clf_loss / args.batch_size # * args.beta one_qz = torch.ones((args.batch_size * 3, 1), requires_grad=True).cuda() log_qz = ops.log_density(torch.ones(args.batch_size * 3, 1), 2).view(-1, 1) Q_loss = F.binary_cross_entropy_with_logits(d_q + log_qz, one_qz) total_hyper_loss = Q_loss + G_loss #+ (gp.sum().cuda())#mean().cuda() total_hyper_loss.backward() optimE.step() optimW1.step() optimW2.step() optimW3.step() optimE.zero_grad() optimW1.zero_grad(), optimW2.zero_grad(), optimW3.zero_grad() total_loss = total_hyper_loss.item() if batch_idx % 50 == 0: acc = correct print('**************************************') print('Iter: {}'.format(len(logger['acc']))) print('Acc: {}, MD Loss: {}, D loss: {}'.format( acc, total_hyper_loss, d_loss)) #print ('penalties: ', gp[0].item(), gp[1].item(), gp[2].item()) #print ('grads: ', grads) print('best test loss: {}'.format(args.best_loss)) print('best test acc: {}'.format(args.best_acc)) #print ('best clf acc: {}'.format(args.best_clf_acc)) print('**************************************') if batch_idx > 1 and batch_idx % 100 == 0: test_acc = 0. test_loss = 0. with torch.no_grad(): for i, (data, y) in enumerate(mnist_test): z = utils.sample_d(x_dist, args.batch_size) codes = netE(z) l1_w, l1_b = W1(codes[0]) l2_w, l2_b = W2(codes[1]) l3_w, l3_b = W3(codes[2]) for (g1_w, g1_b, g2_w, g2_b, g3_w, g3_b) in zip(l1_w, l1_b, l2_w, l2_b, l3_w, l3_b): g1 = (g1_w, g1_b) g2 = (g2_w, g2_b) g3 = (g3_w, g3_b) loss, correct = train_clf(args, [g1, g2, g3], data, y) test_acc += correct.item() test_loss += loss.item() test_loss /= len(mnist_test.dataset) * args.batch_size test_acc /= len(mnist_test.dataset) * args.batch_size print('Test Accuracy: {}, Test Loss: {}'.format( test_acc, test_loss)) #print ('Clf Accuracy: {}, Clf Loss: {}'.format(clf_acc, clf_loss)) if test_loss < best_test_loss: best_test_loss, args.best_loss = test_loss, test_loss if test_acc > best_test_acc: # best_clf_acc, args.best_clf_acc = clf_acc, clf_acc utils.save_hypernet_mnist(args, [netE, netD, W1, W2, W3], test_acc) if test_acc > best_test_acc: best_test_acc, args.best_acc = test_acc, test_acc
def train(args): torch.manual_seed(8734) netG = Generator(args).cuda() netD = Discriminator(args).cuda() print (netG, netD) optimG = optim.Adam(netG.parameters(), lr=1e-4, betas=(0.5, 0.9), weight_decay=1e-4) optimD = optim.Adam(netD.parameters(), lr=1e-4, betas=(0.5, 0.9), weight_decay=1e-4) mnist_train, mnist_test = datagen.load_mnist(args) train = inf_gen(mnist_train) print ('saving reals') reals, _ = next(train) if not os.path.exists('results/'): os.makedirs('results') save_image(reals, 'results/reals.png') one = torch.tensor(1.).cuda() mone = (one * -1) print ('==> Begin Training') for iter in range(args.epochs): ops.batch_zero_grad([netG, netD]) for p in netD.parameters(): p.requires_grad = True for _ in range(args.disc_iters): data, targets = next(train) data = data.view(args.batch_size, 28*28).cuda() netD.zero_grad() d_real = netD(data).mean() d_real.backward(mone, retain_graph=True) noise = torch.randn(args.batch_size, args.z, requires_grad=True).cuda() with torch.no_grad(): fake = netG(noise) fake.requires_grad_(True) d_fake = netD(fake) d_fake = d_fake.mean() d_fake.backward(one, retain_graph=True) gp = ops.grad_penalty_1dim(args, netD, data, fake) gp.backward() d_cost = d_fake - d_real + gp wasserstein_d = d_real - d_fake optimD.step() for p in netD.parameters(): p.requires_grad=False netG.zero_grad() noise = torch.randn(args.batch_size, args.z, requires_grad=True).cuda() fake = netG(noise) G = netD(fake) G = G.mean() G.backward(mone) g_cost = -G optimG.step() if iter % 100 == 0: print('iter: ', iter, 'train D cost', d_cost.cpu().item()) print('iter: ', iter, 'train G cost', g_cost.cpu().item()) if iter % 300 == 0: val_d_costs = [] for i, (data, target) in enumerate(mnist_test): data = data.cuda() d = netD(data) val_d_cost = -d.mean().item() val_d_costs.append(val_d_cost) utils.generate_image(args, iter, netG)
def train_gan(args): netG = init(Generator(args)).cuda() netD = Discriminator(args).cuda() optimG = optim.Adam(netG.parameters(), lr=1e-4, betas=(0.5, 0.9), weight_decay=1e-4) optimD = optim.Adam(netD.parameters(), lr=1e-4, betas=(0.5, 0.9), weight_decay=1e-4) mnist_train, mnist_test = datagen.load_mnist(args) train = inf_gen(mnist_train) print('saving reals') reals, _ = next(train) utils.save_images(reals.detach().cpu().numpy(), 'gan_training/reals.png') one = torch.tensor(1.).cuda() mone = one * -1 args.batch_size = 32 args.gan = True print('==> Begin Training') for iter in range(args.epochs): netG.zero_grad() netD.zero_grad() for p in netD.parameters(): p.requires_grad = True for _ in range(5): data, targets = next(train) data = data.view(args.batch_size, 28 * 28).cuda() netD.zero_grad() d_real = netD(data).mean() d_real.backward(mone, retain_graph=True) noise = torch.randn(args.batch_size, args.z, requires_grad=True).cuda() fake = [] with torch.no_grad(): fake = sample(args, netG, noise, gan=True).view(32, -1) fake.requires_grad_(True) d_fake = netD(fake) d_fake = d_fake.mean() d_fake.backward(one, retain_graph=True) gp = ops.grad_penalty_1dim(args, netD, data, fake) gp.backward() d_cost = d_fake - d_real + gp wasserstein_d = d_real - d_fake optimD.step() for p in netD.parameters(): p.requires_grad = False netG.zero_grad() noise = torch.randn(args.batch_size, args.z, requires_grad=True).cuda() fake = [] for z in noise: fake.append(sample(args, netG, noise, gan=True)) fake = torch.stack(fake) G = netD(fake) G = G.mean() G.backward(mone) g_cost = -G optimG.step() if iter % 100 == 0: with torch.no_grad(): noise = torch.randn(args.batch_size, args.z, requires_grad=True).cuda() samples = sample(args, netG, noise, gan=True) samples = samples.view(-1, 28, 28).cpu().data.numpy() path = 'gan_training/gan_sample_{}.png'.format(iter) print('saving gan sample: ', path) utils.save_images(samples, path) args.gan = False args.batch_size = 1 # accomodate large images cppn(args, netG, iter, noise[:args.n]) args.batch_size = 32 args.gan = True print('iter: ', iter, 'G cost', g_cost.cpu().item()) print('iter: ', iter, 'D cost', d_cost.cpu().item())
def train(args): from torch import optim #torch.manual_seed(8734) netE = models.Encoderz(args).cuda() netD = models.DiscriminatorZ(args).cuda() E1 = models.GeneratorE1(args).cuda() E2 = models.GeneratorE2(args).cuda() #E3 = models.GeneratorE3(args).cuda() #E4 = models.GeneratorE4(args).cuda() #D1 = models.GeneratorD1(args).cuda() D1 = models.GeneratorD2(args).cuda() D2 = models.GeneratorD3(args).cuda() D3 = models.GeneratorD4(args).cuda() print(netE, netD) print(E1, E2, D1, D2, D3) optimE = optim.Adam(netE.parameters(), lr=5e-4, betas=(0.5, 0.9), weight_decay=1e-4) optimD = optim.Adam(netD.parameters(), lr=1e-4, betas=(0.5, 0.9), weight_decay=1e-4) Eoptim = [ optim.Adam(E1.parameters(), lr=1e-4, betas=(0.5, 0.9), weight_decay=1e-4), optim.Adam(E2.parameters(), lr=1e-4, betas=(0.5, 0.9), weight_decay=1e-4), #optim.Adam(E3.parameters(), lr=1e-4, betas=(0.5, 0.9), weight_decay=1e-4), #optim.Adam(E4.parameters(), lr=1e-4, betas=(0.5, 0.9), weight_decay=1e-4) ] Doptim = [ #optim.Adam(D1.parameters(), lr=1e-4, betas=(0.5, 0.9), weight_decay=1e-4), optim.Adam(D1.parameters(), lr=1e-4, betas=(0.5, 0.9), weight_decay=1e-4), optim.Adam(D2.parameters(), lr=1e-4, betas=(0.5, 0.9), weight_decay=1e-4), optim.Adam(D3.parameters(), lr=1e-4, betas=(0.5, 0.9), weight_decay=1e-4) ] Enets = [E1, E2] Dnets = [D1, D2, D3] best_test_loss = np.inf args.best_loss = best_test_loss mnist_train, mnist_test = datagen.load_mnist(args) x_dist = utils.create_d(args.ze) z_dist = utils.create_d(args.z) one = torch.FloatTensor([1]).cuda() mone = (one * -1).cuda() print("==> pretraining encoder") j = 0 final = 100. e_batch_size = 1000 if args.pretrain_e: for j in range(100): x = utils.sample_d(x_dist, e_batch_size) z = utils.sample_d(z_dist, e_batch_size) codes = netE(x) for i, code in enumerate(codes): code = code.view(e_batch_size, args.z) mean_loss, cov_loss = ops.pretrain_loss(code, z) loss = mean_loss + cov_loss loss.backward(retain_graph=True) optimE.step() netE.zero_grad() print('Pretrain Enc iter: {}, Mean Loss: {}, Cov Loss: {}'.format( j, mean_loss.item(), cov_loss.item())) final = loss.item() if loss.item() < 0.1: print('Finished Pretraining Encoder') break print('==> Begin Training') for _ in range(args.epochs): for batch_idx, (data, target) in enumerate(mnist_train): netE.zero_grad() for optim in Eoptim: optim.zero_grad() for optim in Doptim: optim.zero_grad() z = utils.sample_d(x_dist, args.batch_size) codes = netE(z) for code in codes: noise = utils.sample_z_like((args.batch_size, args.z)) d_real = netD(noise) d_fake = netD(code) d_real_loss = torch.log((1 - d_real).mean()) d_fake_loss = torch.log(d_fake.mean()) d_real_loss.backward(torch.tensor(-1, dtype=torch.float).cuda(), retain_graph=True) d_fake_loss.backward(torch.tensor(-1, dtype=torch.float).cuda(), retain_graph=True) d_loss = d_real_loss + d_fake_loss optimD.step() netD.zero_grad() z = utils.sample_d(x_dist, args.batch_size) codes = netE(z) Eweights, Dweights = [], [] i = 0 for net in Enets: Eweights.append(net(codes[i])) i += 1 for net in Dnets: Dweights.append(net(codes[i])) i += 1 d_real = [] for code in codes: d = netD(code) d_real.append(d) netD.zero_grad() d_loss = torch.stack(d_real).log().mean() * 10. for layers in zip(*(Eweights + Dweights)): loss, _ = train_clf(args, layers, data, target) scaled_loss = args.beta * loss scaled_loss.backward(retain_graph=True) d_loss.backward(torch.tensor(-1, dtype=torch.float).cuda(), retain_graph=True) optimE.step() for optim in Eoptim: optim.step() for optim in Doptim: optim.step() loss = loss.item() if batch_idx % 50 == 0: print('**************************************') print('AE MNIST Test, beta: {}'.format(args.beta)) print('MSE Loss: {}'.format(loss)) print('D loss: {}'.format(d_loss)) print('best test loss: {}'.format(args.best_loss)) print('**************************************') if batch_idx > 1 and batch_idx % 199 == 0: test_acc = 0. test_loss = 0. for i, (data, y) in enumerate(mnist_test): z = utils.sample_d(x_dist, args.batch_size) codes = netE(z) Eweights, Dweights = [], [] i = 0 for net in Enets: Eweights.append(net(codes[i])) i += 1 for net in Dnets: Dweights.append(net(codes[i])) i += 1 for layers in zip(*(Eweights + Dweights)): loss, out = train_clf(args, layers, data, y) test_loss += loss.item() if i == 10: break test_loss /= 10 * len(y) * args.batch_size print('Test Loss: {}'.format(test_loss)) if test_loss < best_test_loss: print('==> new best stats, saving') #utils.save_clf(args, z_test, test_acc) if test_loss < best_test_loss: best_test_loss = test_loss args.best_loss = test_loss archE = sampleE(args).cuda() archD = sampleD(args).cuda() rand = np.random.randint(args.batch_size) eweight = list(zip(*Eweights))[rand] dweight = list(zip(*Dweights))[rand] modelE = utils.weights_to_clf(eweight, archE, args.statE['layer_names']) modelD = utils.weights_to_clf(dweight, archD, args.statD['layer_names']) utils.generate_image(args, batch_idx, modelE, modelD, data.cuda())
def run_adv_hyper(args, hypernet): arch = get_network(args) models, fmodels = [], [] #for i in range(10): # model_base, fmodel_base = sample_fmodel(args, hypernet, arch) # models.append(model_base) # fmodels.append(fmodel_base) #fmodel_base = attacks.load_model(FusedNet(models)) model_base, fmodel_base = sample_fmodel(args, hypernet, arch) criterion = Misclassification() fgs = foolbox.attacks.BIM(fmodel_base, criterion) _, test_loader = datagen.load_mnist(args) adv, y = [], [] for n_models in [5, 10, 100, 1000]: print ('ensemble of {}'.format(n_models)) for eps in [0.01, 0.03, 0.08, 0.1, 0.3, 0.5, 1.0]: total_adv = 0 acc, _accs = [], [] _kl_real, _kl_adv = [], [] _soft, _logs, _vars, _ents, _lsoft = [], [], [], [], [] _soft_adv, _logs_adv, _vars_adv, _ents_adv, _lsoft_adv = [], [], [], [], [] for idx, (data, target) in enumerate(test_loader): data, target = data.cuda(), target.cuda() adv_batch, target_batch, _ = sample_adv_batch( data, target, fmodel_base, eps, fgs) if adv_batch is None: continue if len(adv_batch) < 2: continue # get base hypermodel output, I guess output = model_base(adv_batch) pred = output.data.max(1, keepdim=True)[1] correct = pred.eq(target_batch.data.view_as(pred)).long().cpu().sum() n_adv = len(target_batch) - correct.item() total_adv += n_adv soft_out, pred_out, logits, lsoft_out = [], [], [], [] soft_out_adv, pred_out_adv, logits_adv, lsoft_out_adv = [], [], [], [] with torch.no_grad(): for n in range(n_models): model, fmodel = sample_fmodel(args, hypernet, arch) output = model(data) soft_out.append(F.softmax(output, dim=1)) lsoft_out.append(F.log_softmax(output, dim=1)) #pred_out.append(output.data.max(1, keepdim=True)[1]) logits.append(output) output = model(adv_batch) soft_out_adv.append(F.softmax(output, dim=1)) lsoft_out_adv.append(F.log_softmax(output, dim=1)) #pred_out_adv.append(output.data.max(1, keepdim=True)[1]) logits_adv.append(output) softs = torch.stack(soft_out).float() lsoft = torch.stack(lsoft_out).float() #preds = torch.stack(pred_out).float() logs = torch.stack(logits).float() softs_adv = torch.stack(soft_out_adv).float() lsoft_adv = torch.stack(lsoft_out_adv).float() #preds_adv = torch.stack(pred_out_adv).float() logs_adv = torch.stack(logits_adv).float() # Measure variance of individual logits across models. # HyperGAN ensemble has lower variance across 10 class predictions # But a single logit has high variance acorss models units_softmax = softs.var(0).mean().item() # var across models across images ent = float(entropy(softs.mean(0).transpose(0, 1).detach()).mean()) #units_logprob = logs.var(0).mean().item() units_softmax_adv = softs_adv.var(0).mean().item() # var across models - images ent_adv = float(entropy(softs_adv.mean(0).transpose(0, 1).detach()).mean()) log_var = lsoft.var(2).var(0) pop_var = softs.var(2).var(0) log_var_adv = lsoft_adv.var(0).var(1) pop_var_adv = softs_adv.var(0).var(1) """ Core Debug """ # print ('softmax var: ', units_softmax) # print ('logprob var: ', units_logprob) # print ('ensemble var: ', ensemble_var) # build lists # softmax probs _soft.append(units_softmax) _soft_adv.append(units_softmax_adv) # softmax variance _vars.append(pop_var) _vars_adv.append(pop_var_adv) # log variance _ents.append(ent) _ents_adv.append(ent_adv) #_logs.append(units_logprob) #_logs_adv.append(units_logprob_adv) if idx > 10: print ('REAL: ent: {}'.format(torch.tensor(_ents).mean())) print ('ADV Eps: {}, ent: {}'.format(eps, torch.tensor(_ents_adv).mean())) break;
def train(args): torch.manual_seed(8734) netE = models.Encoder(args).cuda() W1 = models.GeneratorW1(args).cuda() W2 = models.GeneratorW2(args).cuda() W3 = models.GeneratorW3(args).cuda() W4 = models.GeneratorW4(args).cuda() W5 = models.GeneratorW5(args).cuda() netD = models.DiscriminatorZ(args).cuda() print(netE, W1, W2, W3, W4, W5, netD) optimE = optim.Adam(netE.parameters(), lr=5e-3, betas=(0.5, 0.9), weight_decay=1e-4) optimW1 = optim.Adam(W1.parameters(), lr=5e-4, betas=(0.5, 0.9), weight_decay=1e-4) optimW2 = optim.Adam(W2.parameters(), lr=5e-4, betas=(0.5, 0.9), weight_decay=1e-4) optimW3 = optim.Adam(W3.parameters(), lr=5e-4, betas=(0.5, 0.9), weight_decay=1e-4) optimW4 = optim.Adam(W4.parameters(), lr=5e-4, betas=(0.5, 0.9), weight_decay=1e-4) optimW5 = optim.Adam(W5.parameters(), lr=5e-4, betas=(0.5, 0.9), weight_decay=1e-4) optimD = optim.Adam(netD.parameters(), lr=5e-5, betas=(0.5, 0.9), weight_decay=1e-4) m_best_test_acc, m_best_test_loss = 0., np.inf c_best_test_acc, c_best_test_loss = 0., np.inf args.m_best_loss, args.m_best_acc = m_best_test_loss, m_best_test_acc args.c_best_loss, args.c_best_acc = c_best_test_loss, c_best_test_acc mnist_train, mnist_test = datagen.load_mnist(args) cifar_train, cifar_test = datagen.load_cifar(args) x_dist = utils.create_d(args.ze) z_dist = utils.create_d(args.z) one = torch.FloatTensor([1]).cuda() mone = (one * -1).cuda() print("==> pretraining encoder") j = 0 final = 100. e_batch_size = 1000 if args.pretrain_e: mask1 = torch.zeros(e_batch_size, args.ze).cuda() mask2 = torch.ones(e_batch_size, args.ze).cuda() for j in range(500): x = utils.sample_d(x_dist, e_batch_size) z = utils.sample_d(z_dist, e_batch_size) if j % 2 == 0: x = torch.cat((x, mask1), dim=0) if j % 2 == 1: x = torch.cat((x, mask2), dim=0) codes = netE(x) for i, code in enumerate(codes): code = code.view(e_batch_size, args.z) mean_loss, cov_loss = pretrain_loss(code, z) loss = mean_loss + cov_loss loss.backward(retain_graph=True) optimE.step() netE.zero_grad() print('Pretrain Enc iter: {}, Mean Loss: {}, Cov Loss: {}'.format( j, mean_loss.item(), cov_loss.item())) final = loss.item() if loss.item() < 0.1: print('Finished Pretraining Encoder') break print('==> Begin Training') for _ in range(args.epochs): for batch_idx, (mnist, cifar) in enumerate(zip(mnist_train, cifar_train)): if batch_idx % 2 == 0: data, target = mnist mask = torch.zeros(args.batch_size, args.ze).cuda() else: data, target = cifar mask = torch.ones(args.batch_size, args.ze).cuda() batch_zero_grad([netE, W1, W2, W3, W4, W5, netD]) z = utils.sample_d(x_dist, args.batch_size) z = torch.cat((z, mask), dim=0) codes = netE(z) l1 = W1(codes[0]) l2 = W2(codes[1]) l3 = W3(codes[2]) l4 = W4(codes[3]) l5 = W5(codes[4]) # Z Adversary for code in codes: noise = utils.sample_d(z_dist, args.batch_size) d_real = netD(noise) d_fake = netD(code) d_real_loss = -1 * torch.log((1 - d_real).mean()) d_fake_loss = -1 * torch.log(d_fake.mean()) d_real_loss.backward(retain_graph=True) d_fake_loss.backward(retain_graph=True) d_loss = d_real_loss + d_fake_loss optimD.step() # Generator (Mean test) netD.zero_grad() z = utils.sample_d(x_dist, args.batch_size) z = torch.cat((z, mask), dim=0) codes = netE(z) l1 = W1(codes[0]) l2 = W2(codes[1]) l3 = W3(codes[2]) l4 = W4(codes[3]) l5 = W5(codes[4]) d_real = [] for code in codes: d = netD(code) d_real.append(d) netD.zero_grad() d_loss = torch.stack(d_real).log().mean() * 10. for (g1, g2, g3, g4, g5) in zip(l1, l2, l3, l4, l5): correct, loss = train_clf(args, [g1, g2, g3, g4, g5], data, target) scaled_loss = args.beta * loss if loss != loss: sys.exit(0) scaled_loss.backward(retain_graph=True) d_loss.backward(torch.tensor(-1, dtype=torch.float).cuda(), retain_graph=True) optimE.step() optimW1.step() optimW2.step() optimW3.step() optimW4.step() optimW5.step() loss = loss.item() """ Update Statistics """ if batch_idx % 50 == 0 or batch_idx % 50 == 1: acc = correct print('**************************************') if batch_idx % 50 == 0: print('MNIST Test: Enc, Dz, Lscale: {} test'.format( args.beta)) if batch_idx % 50 == 1: print('CIFAR Test: Enc, Dz, Lscale: {} test'.format( args.beta)) print('Acc: {}, G Loss: {}, D Loss: {}'.format( acc, loss, d_loss)) print('best test loss: {}, {}'.format(args.m_best_loss, args.c_best_loss)) print('best test acc: {}, {}'.format(args.m_best_acc, args.c_best_acc)) print('**************************************') if batch_idx > 1 and batch_idx % 100 == 0: m_test_acc = 0. m_test_loss = 0. for i, (data, y) in enumerate(mnist_test): z = utils.sample_d(x_dist, args.batch_size) z = torch.cat( (z, torch.zeros(args.batch_size, args.ze).cuda()), dim=0) w1_code, w2_code, w3_code, w4_code, w5_code = netE(z) l1 = W1(w1_code) l2 = W2(w2_code) l3 = W3(w3_code) l4 = W4(w4_code) l5 = W5(w5_code) for (g1, g2, g3, g4, g5) in zip(l1, l2, l3, l4, l5): correct, loss = train_clf(args, [g1, g2, g3, g4, g5], data, y) m_test_acc += correct.item() m_test_loss += loss.item() m_test_loss /= len(mnist_test.dataset) * args.batch_size m_test_acc /= len(mnist_test.dataset) * args.batch_size print('MNIST Test Accuracy: {}, Test Loss: {}'.format( m_test_acc, m_test_loss)) c_test_acc = 0. c_test_loss = 0 for i, (data, y) in enumerate(cifar_test): z = utils.sample_d(x_dist, args.batch_size) z = torch.cat( (z, torch.ones(args.batch_size, args.ze).cuda()), dim=0) w1_code, w2_code, w3_code, w4_code, w5_code = netE(z) l1 = W1(w1_code) l2 = W2(w2_code) l3 = W3(w3_code) l4 = W4(w4_code) l5 = W5(w5_code) for (g1, g2, g3, g4, g5) in zip(l1, l2, l3, l4, l5): correct, loss = train_clf(args, [g1, g2, g3, g4, g5], data, y) c_test_acc += correct.item() c_test_loss += loss.item() c_test_loss /= len(cifar_test.dataset) * args.batch_size c_test_acc /= len(cifar_test.dataset) * args.batch_size print('CIFAR Test Accuracy: {}, Test Loss: {}'.format( c_test_acc, c_test_loss)) if m_test_loss < m_best_test_loss or m_test_acc > m_best_test_acc: #utils.save_hypernet_cifar(args, [netE, W1, W2, W3, W4, W5, netD], test_acc) print('==> new best stats, saving') if m_test_loss < m_best_test_loss: m_best_test_loss = m_test_loss args.m_best_loss = m_test_loss if m_test_acc > m_best_test_acc: m_best_test_acc = m_test_acc args.m_best_acc = m_test_acc if c_test_loss < c_best_test_loss or c_test_acc > c_best_test_acc: #utils.save_hypernet_cifar(args, [netE, W1, W2, W3, W4, W5, netD], test_acc) print('==> new best stats, saving') if c_test_loss < c_best_test_loss: c_best_test_loss = c_test_loss args.c_best_loss = c_test_loss if c_test_acc > c_best_test_acc: c_best_test_acc = c_test_acc args.c_best_acc = c_test_acc
def train(args, model): torch.manual_seed(1) netE = models.Encoderz(args).cuda() netG = models.Final_Small(args).cuda() netD = models.DiscriminatorQz(args).cuda() print(netE, netG, netD) optimE = optim.Adam(netE.parameters(), lr=1e-4, betas=(0.5, 0.9), weight_decay=5e-4) optimG = optim.Adam(netG.parameters(), lr=1e-4, betas=(0.5, 0.9), weight_decay=5e-4) optimD = optim.Adam(netD.parameters(), lr=1e-4, betas=(0.5, 0.9), weight_decay=5e-4) best_test_acc, best_clf_acc, best_test_loss, = 0., 0., np.inf args.best_loss, args.best_acc = best_test_loss, best_test_acc args.best_clf_loss, args.best_clf_acc = np.inf, 0 mnist_train, mnist_test = datagen.load_mnist(args) x_dist = utils.create_d(args.ze) z_dist = utils.create_d(args.z) one = torch.tensor(1.).cuda() mone = one * -1 print("==> pretraining encoder") j = 0 final = 100. e_batch_size = 1000 if args.pretrain_e is True: for j in range(500): x = utils.sample_d(x_dist, e_batch_size) z = utils.sample_d(z_dist, e_batch_size) code = netE(x) qz = utils.sample_d(z_dist, e_batch_size) mean_loss, cov_loss = ops.pretrain_loss(code, qz) loss = mean_loss + cov_loss loss.backward() optimE.step() netE.zero_grad() print('Pretrain Enc iter: {}, Mean Loss: {}, Cov Loss: {}'.format( j, mean_loss.item(), cov_loss.item())) final = loss.item() if loss.item() < 0.1: print('Finished Pretraining Encoder') break print('==> Begin Training') for _ in range(args.epochs): for batch_idx, (data, target) in enumerate(mnist_train): data, target = data.cuda(), target.cuda() z = utils.sample_d(x_dist, args.batch_size) ze = utils.sample_d(z_dist, args.batch_size) qz = utils.sample_d(z_dist, args.batch_size) code = netE(z) log_pz = ops.log_density(ze, 2).view(-1, 1) d_loss, d_q = ops.calc_d_loss(args, netD, ze, [code], log_pz) d_loss.backward(retain_graph=True) optimD.step() optimE.step() optimD.zero_grad() optimE.zero_grad() gen_layers = netG(code) gp, grads, norms = ops.calc_gradient_penalty_layer(z, netG, netE) grads = grads.mean(0).mean(0).item() accs = torch.zeros(len(gen_layers)).cuda() losses = torch.zeros(len(gen_layers)).cuda() for i, layer in enumerate(gen_layers): output = model(data, layer) loss = F.cross_entropy(output, target) pred = output.data.max(1, keepdim=True)[1] correct = pred.eq(target.data.view_as(pred)).long().cpu().sum() losses[i] = loss accs[i] = correct G_loss, correct = losses.max(), accs.mean() one_qz = torch.ones((args.batch_size, 1), requires_grad=True).cuda() log_qz = ops.log_density(torch.ones(args.batch_size, 1), 2).view(-1, 1) Q_loss = F.binary_cross_entropy_with_logits(d_q + log_qz, one_qz) total_hyper_loss = Q_loss + G_loss #+ (gp.sum().cuda())#mean().cuda() total_hyper_loss.backward() optimE.step() optimG.step() optimE.zero_grad() optimG.zero_grad() total_loss = total_hyper_loss.item() if batch_idx % 50 == 0: acc = correct print('**************************************') print('Iter: {}'.format(len(logger['acc']))) print('Acc: {}, MD Loss: {}, D loss: {}'.format( acc, total_hyper_loss, d_loss)) print('penalties: ', gp.item()) print('grads: ', grads) print('best test loss: {}'.format(args.best_loss)) print('best test acc: {}'.format(args.best_acc)) print('best clf acc: {}'.format(args.best_clf_acc)) print('**************************************') if batch_idx % 100 == 0: test_acc = 0. test_loss = 0. with torch.no_grad(): for i, (data, target) in enumerate(mnist_test): data, target = data.cuda(), target.cuda() z = utils.sample_d(x_dist, args.batch_size) code = netE(z) gen_layers = netG(code) for i, layer in enumerate(gen_layers): output = model(data, layer) test_loss += F.cross_entropy(output, target) pred = output.data.max(1, keepdim=True)[1] test_acc += pred.eq( target.data.view_as(pred)).float().sum() test_loss /= len(mnist_test.dataset) * args.batch_size test_acc /= len(mnist_test.dataset) * args.batch_size clf_acc, clf_loss = test_clf(args, gen_layers) stats.update_logger(gen_layers, logger) stats.update_acc(logger, test_acc) stats.update_grad(logger, grads, norms) stats.save_logger(logger, args.exp) stats.plot_logger(logger) print('Test Accuracy: {}, Test Loss: {}'.format( test_acc, test_loss)) print('Clf Accuracy: {}, Clf Loss: {}'.format( clf_acc, clf_loss)) if test_loss < best_test_loss: best_test_loss, args.best_loss = test_loss, test_loss if test_acc > best_test_acc: best_test_acc, args.best_acc = test_acc, test_acc if clf_acc > best_clf_acc: best_clf_acc, args.best_clf_acc = clf_acc, clf_acc utils.save_hypernet_layer(args, [netE, netD, netG], clf_acc)
def run_adv_model(args, models): for model in models: model.eval() print ('models loaded') #models = models[:5] model = FusedNet(models) print ('made fusednet') fmodel = attacks.load_model(model) criterion = Misclassification() fgs = foolbox.attacks.FGSM(fmodel) print ('created attack') _, test_loader = datagen.load_mnist(args) print ('loaded dataset') for eps in [0.01, 0.03, 0.08, .1, .3, .5, 1.0]: total_adv = 0 _soft, _logs, _vars, _ents, _lsoft = [], [], [], [], [] _soft_adv, _logs_adv, _vars_adv, _ents_adv, _lsoft_adv = [], [], [], [], [] _kl_real, _kl_adv = [], [] for idx, (data, target) in enumerate(test_loader): data, target = data.cuda(), target.cuda() adv_batch, target_batch, _ = sample_adv_batch(data, target, fmodel, eps, fgs) if adv_batch is None: continue # get intial prediction of ensemble, sure output = model(adv_batch) pred = output.data.max(1, keepdim=True)[1] correct = pred.eq(target_batch.data.view_as(pred)).long().cpu().sum() n_adv = len(target_batch)-correct.item() # set up to sample from individual models soft_out, pred_out, logits, lsoft_out = [], [], [], [] soft_out_adv, pred_out_adv, logits_adv, lsoft_out_adv = [], [], [], [] for i in range(len(models)): output = models[i](data) soft_out.append(F.softmax(output, dim=1)) pred_out.append(output.data.max(1, keepdim=True)[1]) lsoft_out.append(F.log_softmax(output, dim=1)) logits.append(output) output = models[i](adv_batch) soft_out_adv.append(F.softmax(output, dim=1)) lsoft_out_adv.append(F.log_softmax(output, dim=1)) softs = torch.stack(soft_out).float() preds = torch.stack(pred_out).float() lsoft = torch.stack(lsoft_out).float() logs = torch.stack(logits).float() softs_adv = torch.stack(soft_out_adv).float() lsoft_adv = torch.stack(lsoft_out_adv).float() # Measure variance of individual logits across models. # HyperGAN ensemble has lower variance across 10 class predictions # But a single logit has high variance acorss models units_softmax = softs.var(0).mean().item() # var across models across images units_logprob = logs.var(0).mean().item() ensemble_var = softs.mean(0).var(1).mean().item() ent = float(entropy(softs.mean(0).transpose(0, 1).detach()).mean()) #units_logprob = logs.var(0).mean().item() ent_adv = float(entropy(softs_adv.mean(0).transpose(0, 1).detach()).mean()) units_softmax_adv = softs_adv.var(0).mean().item() # var across models - images ensemble_var_adv = softs_adv.mean(0).var(1).mean().item() """ Core Debug """ # print ('softmax var: ', units_softmax) # print ('logprob var: ', units_logprob) # print ('ensemble var: ', ensemble_var) # build lists _soft.append(units_softmax) _soft_adv.append(units_softmax_adv) _logs.append(units_logprob) # log variance _ents.append(ent) _ents_adv.append(ent_adv) total_adv += n_adv if idx > 10: print ('REAL: ent: {}'.format(torch.tensor(_ents).mean())) print ('ADV Eps: {}, ent: {}'.format( eps, torch.tensor(_ents_adv).mean())) break;
def train(args): torch.manual_seed(8734) netE = models.Encoder(args).cuda() W1 = models.GeneratorW1(args).cuda() W2 = models.GeneratorW2(args).cuda() W3 = models.GeneratorW3(args).cuda() netD = models.DiscriminatorZ(args).cuda() print (netE, W1, W2, W3) optimE = optim.Adam(netE.parameters(), lr=5e-4, betas=(0.5, 0.9), weight_decay=1e-4) optimW1 = optim.Adam(W1.parameters(), lr=1e-4, betas=(0.5, 0.9), weight_decay=1e-4) optimW2 = optim.Adam(W2.parameters(), lr=1e-4, betas=(0.5, 0.9), weight_decay=1e-4) optimW3 = optim.Adam(W3.parameters(), lr=1e-4, betas=(0.5, 0.9), weight_decay=1e-4) optimD = optim.Adam(netD.parameters(), lr=1e-5, betas=(0.5, 0.9), weight_decay=1e-4) best_test_acc, best_test_loss = 0., np.inf args.best_loss, args.best_acc = best_test_loss, best_test_acc mnist_train, mnist_test = datagen.load_mnist(args) x_dist = utils.create_d(args.ze) z_dist = utils.create_d(args.z) one = torch.FloatTensor([1]).cuda() mone = (one * -1).cuda() print ("==> pretraining encoder") j = 0 final = 100. e_batch_size = 1000 if args.pretrain_e: for j in range(2000): x = utils.sample_d(x_dist, e_batch_size) z = utils.sample_d(z_dist, e_batch_size) codes = netE(x) for i, code in enumerate(codes): code = code.view(e_batch_size, args.z) mean_loss, cov_loss = ops.pretrain_loss(code, z) loss = mean_loss + cov_loss loss.backward(retain_graph=True) optimE.step() netE.zero_grad() print ('Pretrain Enc iter: {}, Mean Loss: {}, Cov Loss: {}'.format( j, mean_loss.item(), cov_loss.item())) final = loss.item() if loss.item() < 0.1: print ('Finished Pretraining Encoder') break print ('==> Begin Training') for _ in range(args.epochs): for batch_idx, (data, target) in enumerate(mnist_train): ops.batch_zero_grad([netE, W1, W2, W3, netD]) z = utils.sample_d(x_dist, args.batch_size) codes = netE(z) l1 = W1(codes[0]) l2 = W2(codes[1]) l3 = W3(codes[2]) if args.use_d: ops.free_params([netD]) ops.frozen_params([netE, W1, W2, W3]) for code in codes: noise = utils.sample_d(z_dist, args.batch_size) d_real = netD(noise) d_fake = netD(code) d_real_loss = -1 * torch.log((1-d_real).mean()) d_fake_loss = -1 * torch.log(d_fake.mean()) d_real_loss.backward(retain_graph=True) d_fake_loss.backward(retain_graph=True) d_loss = d_real_loss + d_fake_loss optimD.step() ops.frozen_params([netD]) ops.free_params([netE, W1, W2, W3]) for (g1, g2, g3) in zip(l1, l2, l3): correct, loss = train_clf(args, [g1, g2, g3], data, target) scaled_loss = args.beta * loss scaled_loss.backward(retain_graph=True) optimE.step() optimW1.step() optimW2.step() optimW3.step() loss = loss.item() if batch_idx % 50 == 0: acc = (correct / 1) print ('**************************************') print ('{} MNIST Test, beta: {}'.format(args.model, args.beta)) print ('Acc: {}, Loss: {}'.format(acc, loss)) print ('best test loss: {}'.format(args.best_loss)) print ('best test acc: {}'.format(args.best_acc)) print ('**************************************') if batch_idx > 1 and batch_idx % 199 == 0: test_acc = 0. test_loss = 0. ensemble = 5 for i, (data, y) in enumerate(mnist_test): en1, en2, en3 = [], [], [] for i in range(ensemble): z = utils.sample_d(x_dist, args.batch_size) codes = netE(z) rand = np.random.randint(32) en1.append(W1(codes[0])[rand]) en2.append(W2(codes[1])[rand]) en3.append(W3(codes[2])[rand]) g1 = torch.stack(en1).mean(0) g2 = torch.stack(en2).mean(0) g3 = torch.stack(en3).mean(0) correct, loss = train_clf(args, [g1, g2, g3], data, y) test_acc += correct.item() test_loss += loss.item() test_loss /= len(mnist_test.dataset) test_acc /= len(mnist_test.dataset) """ for (g1, g2, g3) in zip(l1, l2, l3): correct, loss = train_clf(args, [g1, g2, g3], data, y) test_acc += correct.item() test_loss += loss.item() test_loss /= len(mnist_test.dataset) * args.batch_size test_acc /= len(mnist_test.dataset) * args.batch_size """ print ('Test Accuracy: {}, Test Loss: {}'.format(test_acc, test_loss)) if test_loss < best_test_loss or test_acc > best_test_acc: print ('==> new best stats, saving') #utils.save_clf(args, z_test, test_acc) if test_acc > .95: utils.save_hypernet_mnist(args, [netE, W1, W2, W3], test_acc) if test_loss < best_test_loss: best_test_loss = test_loss args.best_loss = test_loss if test_acc > best_test_acc: best_test_acc = test_acc args.best_acc = test_acc