def perturb(X_nat, y, epsilon, Modell=None): """ Given examples (X_nat, y), returns their adversarial counterparts with an attack length of epsilon. """ ''' # Providing epsilons in batch if epsilons is not None: self.epsilon = epsilons ''' X = np.copy(X_nat) X_var = to_var(torch.from_numpy(X), requires_grad=True) y_var = to_var(torch.LongTensor(y)) ''' scores = self.model(X_var) loss = self.loss_fn(scores, y_var) ''' if Modell == None: Modell = VGG('VGG19') Modell.cuda() scores = Modell(X_var) #loss = Modell.loss_fn(scores,y_var) criterion = nn.CrossEntropyLoss() loss = criterion(scores, y_var) loss.backward() grad_sign = X_var.grad.data.cpu().sign().numpy() X += epsilon * grad_sign X = np.clip(X, 0, 1) return X
def perturb(self, X_nat, y): """ Given examples (X_nat, y), returns adversarial examples within epsilon of X_nat in l_infinity norm. """ if self.rand: X = X_nat + np.random.uniform(-self.epsilon, self.epsilon, X_nat.shape).astype('float32') else: X = np.copy(X_nat) for i in range(self.k): X_var = to_var(torch.from_numpy(X), requires_grad=True) y_var = to_var(torch.LongTensor(y)) scores = self.model(X_var) loss = self.loss_fn(scores, y_var) loss.backward() grad = X_var.grad.data.cpu().numpy() X += self.a * np.sign(grad) X = np.clip(X, X_nat - self.epsilon, X_nat + self.epsilon) X = np.clip(X, 0, 1) # ensure valid pixel range return X
def perturb(self, x_nat, y): """ Given one example (x_nat, y), returns an adversarial examples within epsilon of x_nat in l_infinity norm. """ if self.rand: x = x_nat + np.random.uniform(-self.epsilon, self.epsilon, x_nat.shape).astype('float32') else: x = np.copy(x_nat) for i in range(self.k): x_var = to_var(torch.from_numpy(x), requires_grad=True) y_var = to_var(torch.LongTensor([y])) scores = self.model(x_var) loss = self.loss_fn(scores, y_var) loss.backward() grad = x_var.grad.data.cpu().numpy() x += self.a * np.sign(grad) x = np.clip(x, x_nat - self.epsilon, x_nat + self.epsilon) x = np.clip(x, 0, 1) # ensure valid pixel range return x
def main(): # 自适应使用GPU还是CPU device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = Net().to(device) optimizer = torch.optim.Adam(model.parameters()) criterion = torch.nn.CrossEntropyLoss() train_loader = Data.DataLoader(dataset=train_data, batch_size=batch_size, shuffle=True) test_loader = Data.DataLoader(dataset=test_data, batch_size=batch_size) adversary = FGSMAttack(epsilon=0.2) for epoch in range(epochs): for t, (x, y) in enumerate(train_loader): x_var, y_var = to_var(x), to_var(y.long()) loss = criterion(model(x_var), y_var) # adversarial training if epoch + 1 > delay: # use predicted label to prevent label leaking y_pred = pred_batch(x, model) x_adv = adv_train(x, y_pred, model, criterion, adversary) x_adv_var = to_var(x_adv) loss_adv = criterion(model(x_adv_var), y_var) loss = (loss + loss_adv) / 2 if (t + 1) % 10 == 0: print('t = %d, loss = %.8f' % (t + 1, loss.item())) optimizer.zero_grad() loss.backward() optimizer.step() # 每跑完一次epoch测试一下准确率 进入测试模式 禁止梯度传递 with torch.no_grad(): correct = 0 total = 0 sum_val_loss = 0 for data in test_loader: images, labels = data images, labels = images.to(device), labels.to(device) outputs = model(images) val_loss = criterion(outputs, labels) sum_val_loss += val_loss.item() # 取得分最高的那个类 _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum() print('epoch=%d accuracy=%.02f%% val_loss=%.02f%' % (epoch + 1, (100 * correct / total), sum_val_loss)) sum_val_loss = 0.0 torch.save(model.state_dict(), './cifar-adv-pytorch/net.pth')
def perturb(self, X_nat, y): """ Given examples (X_nat, y), returns adversarial examples within epsilon of X_nat in l_infinity norm. """ X = np.copy(X_nat) for i in range(self.k): X_var = to_var(torch.from_numpy(X), requires_grad=True) y_var = to_var(torch.LongTensor(y)) scores = self.model(X_var) loss = self.loss_fn(scores, y_var) loss.backward() grad = X_var.grad.data.cpu().numpy() if self.order is 'inf': X += self.epsilon * np.sign(grad) elif self.order is '2': #print('grad shape', grad.shape) #square = sum(grad**2) grad = grad**2 square = np.sum(grad, axis=2) square = np.sum(square, axis=2) square = np.squeeze(square) normalized_grad = (grad.T / (np.sqrt(square))).T X += self.epsilon * normalized_grad else: raise NotImplementedError( 'Only L-inf, L2 norms FGSM attacks are implemented') #X = np.clip(X, X_nat - self.epsilon, X_nat + self.epsilon) if self.is_train == False: X = np.clip(X, 0, 1) # ensure valid pixel range #print('maximum diff adv', np.max(np.abs(X-X_nat))) if self.storeadv == True: X = np.clip(X, 0, 1) * 255 print('self.k', self.k, 'self.order', self.order) if self.k == 1 and self.order is '2': cv2.imwrite( os.path.join(advtrainfolder, 'PGD_Advtraining.png'), np.squeeze(X[0, :])) else: cv2.imwrite( os.path.join(advtrainfolder, 'IFGSM_Advtraining.png'), np.squeeze(X[0, :])) exit(0) return X
def perturb(self, x_nat, y): """ Given one example (x_nat, y), returns its adversarial counterpart with an attack length of epsilon. """ x = np.copy(x_nat) x_var = to_var(torch.from_numpy(x), requires_grad=True) y_var = to_var(torch.LongTensor([int(y)])) scores = self.model(x_var) loss = self.loss_fn(scores, y_var) loss.backward() grad_sign = x_var.grad.data.cpu().sign().numpy() x += self.epsilon * grad_sign x = np.clip(x, 0, 1) return x
def feval(): loss_adv = 0 for k in range(len(x_adv)): x_adv_var = to_var(torch.from_numpy(x_adv[k].astype(np.float32))) #loss_adv = loss_adv + criterion(net(x_adv_var), y_var) loss_adv = loss_adv + criterion(model_list[i](x_adv_var), y_var) loss = criterion(model_list[i](x_var), y_var) loss_adv = loss_adv + loss loss_adv = loss_adv/2.0 #loss_adv = loss_adv/len(x_adv) optimizer.zero_grad() loss_adv.backward() return loss_adv #TODO return loss for extension
def perturb(self, X_nat, y, epsilons=None): """ Given examples (X_nat, y), returns their adversarial counterparts with an attack length of epsilon. """ # Providing epsilons in batch if epsilons is not None: self.epsilon = epsilons X = np.copy(X_nat) X_var = to_var(torch.from_numpy(X), requires_grad=True) y_var = to_var(torch.LongTensor(y)) scores = self.model(X_var) loss = self.loss_fn(scores, y_var) loss.backward() grad_sign = X_var.grad.data.cpu().sign().numpy() X += self.epsilon * grad_sign X = np.clip(X, 0, 1) return X
def jacobian(model, x, nb_classes=10): """ This function will return a list of PyTorch gradients """ list_derivatives = [] x_var = to_var(torch.from_numpy(x), requires_grad=True) # derivatives for each class for class_ind in range(nb_classes): score = model(x_var)[:, class_ind] score.backward() list_derivatives.append(x_var.grad.data.cpu().numpy()) x_var.grad.data.zero_() return list_derivatives
def feval(): loss_adv = 0 for k in range(len(x_adv_temp)): x_adv_var = to_var(torch.from_numpy(x_adv_temp[k].astype(np.float32))) #loss_adv = loss_adv + criterion(net(x_adv_var), y_var) #add adversarial loss loss_adv = loss_adv + criterion(self.model_list[i](x_adv_var), y_var) #add clean loss loss_adv = loss_adv + criterion(self.model_list[i](x_var), y_var) loss_adv = loss_adv/2.0 optimizer.zero_grad() loss_adv.backward() return loss_adv #TODO return loss for extension
#adversary = FGSMAttack(epsilon=0.3) adversary = LinfPGDAttack() # Train the model criterion = nn.CrossEntropyLoss() optimizer = torch.optim.RMSprop(net.parameters(), lr=param['learning_rate'], weight_decay=param['weight_decay']) for epoch in range(param['num_epochs']): print('Starting epoch %d / %d' % (epoch + 1, param['num_epochs'])) for t, (x, y) in enumerate(loader_train): x_var, y_var = to_var(x), to_var(y.long()) loss = criterion(net(x_var), y_var) # adversarial training if epoch + 1 > param['delay']: # use predicted label to prevent label leaking y_pred = pred_batch(x, net) x_adv = adv_train(x, y_pred, net, criterion, adversary) x_adv_var = to_var(x_adv) loss_adv = criterion(net(x_adv_var), y_var) loss = (loss + loss_adv) / 2 if (t + 1) % 100 == 0: print('t = %d, loss = %.8f' % (t + 1, loss.item())) optimizer.zero_grad()
# early stopping parameters patience = param['patience'] best_loss = 1e4 # Print model to logfile #print(net, file=logfile) # Change optimizer for finetuning optimizer = optim.Adam(net.parameters()) for e in range(param['nepochs']): print('Starting epoch %d' % (e + 1)) for t, (x_input, y_label) in enumerate(train_loader): #print('t:',t) x_var, y_var = to_var(x_input), to_var(y_label.long()) if args.advtraining == 'BayesWRM' or args.advtraining == 'Bayes': if args.advtraining == 'BayesWRM': x_adv = x_input.cpu() x_adv = adv_train(X=x_adv, y=y_label.cpu().long(), model=model_list, criterion=criterion, adversary=adversary) for i in range(len(model_list)): optimizer = SGAdaHMC(model_list[i].parameters(), config=dict(lr=args.initlr, T=args.T_out)) #optimizer = optimizer_list[i] if advtraining == 'BayesWRM':
def perturb(self, X_nat, y): """ Given examples (X_nat, y), returns adversarial examples within epsilon of X_nat in l_infinity norm. """ import time start_time = time.time() if self.rand: X = X_nat + np.random.uniform(-self.epsilon, self.epsilon, X_nat.shape).astype('float32') else: X = np.copy(X_nat) Sz = self.Sz Stheta = self.Stheta MC = 15 #List of numpy arrays z_list = [copy.deepcopy(X) for i in range(Sz)] y_list = [copy.deepcopy(y) for i in range(Sz)] y = np.concatenate(y_list) #print('X.shape', X.shape) z_stack = np.concatenate(z_list) #print('Z_stack shape', z_stack.shape) y_var = to_var(torch.LongTensor(y)) loss_fn = nn.CrossEntropyLoss() znn = ZNN(zinput=z_stack, y_var=y_var) #optimizer = torch.optim.SGD(znn.parameters(), lr=1e-4) if self.optim == 'SGHMC': optimizer = SGHMC(znn.parameters(), config=dict(lr=self.a, T=self.T, L=self.k)) elif self.optim == 'SGAdaHMC': optimizer = SGAdaHMC(znn.parameters(), config=dict(lr=self.a, T=self.T, L=self.k)) def helper(): def feval(): total_loss = znn(model_list=self.model_list, gamma=self.gamma, Stheta=Stheta) optimizer.zero_grad() total_loss.backward() return total_loss #TODO return loss for extension return feval total_loss = optimizer.step(helper()) z_stack = znn.Z_var.data.cpu().numpy() #print('z_stack.shape', z_stack.shape) #print('inner maximization time %s' % (time.time()-start_time)) #print('Sz step finished') z_list = [] for i in range(Sz): batch_size = int(z_stack.shape[0]/Sz) #z_list.append(z_stack[i*batch_size:(i+1)*batch_size, :, :, :]) z_adv = z_stack[i*batch_size:(i+1)*batch_size, :, :, :] z_list.append(z_adv) #print('maximum diff adv', np.max(np.abs(z_list[-1]-X_nat))) #print('Sz', Sz) #print('Stheta', Stheta) #print('len(z_list)', len(z_list)) if self.storeadv == True: for ind, X in enumerate(z_list): X = np.clip(X, 0, 1)*255 cv2.imwrite(os.path.join('advtrain_sample', 'BayesWRM_Advtraining_'+str(ind)+'.png'), np.squeeze(X[0,:])) exit(0) return z_list
def train(self): self.epochs = self.params.epochs criterion = nn.CrossEntropyLoss() start_epoch = 0 if advtraining == 'BayesWRM' or advtraining == 'Bayes': for net in self.model_list: net.train() else: self.model.train() print("Starting training") self.print_info() init_lr = 0.5 init_lr = args.initlr for epoch in range(start_epoch, self.params.epochs): print('start epoch', str(epoch)) print('advtraining method', advtraining) #break for i, (images, labels) in enumerate(self.train_loader): X, y = images.cuda(), labels.cuda() x_var, y_var = to_var(X), to_var(y) if adversary is not None: x_adv = X.cpu() if advtraining == 'BayesWRM': x_adv = adv_train(X=x_adv, y=labels.cpu().long(), model=self.model_list, criterion=criterion, adversary=adversary) for i in range(Stheta): x_adv_temp = x_adv if args.multi == True: del x_adv_temp[i] if epoch < 2: lr = init_lr elif epoch < 5: lr = 0.1*init_lr elif epoch < 10: lr = 0.1*init_lr else: lr = 0.05*init_lr #optimizer = SGHMC(self.model_list[i].parameters(), config=dict(lr=lr)) if args.outoptimizer == 'SGHMC': optimizer = SGHMC(filter(lambda x: x.requires_grad, self.model_list[i].parameters()), config=dict(lr=lr, T=args.T_out)) elif args.outoptimizer == 'SGAdaHMC': optimizer = SGAdaHMC(filter(lambda x: x.requires_grad, self.model_list[i].parameters()), config=dict(lr=0.01, T=args.T_out)) else: raise NotImplementedError('Inner optimizer not implemented') def helper(): def feval(): loss_adv = 0 for k in range(len(x_adv_temp)): x_adv_var = to_var(torch.from_numpy(x_adv_temp[k].astype(np.float32))) #loss_adv = loss_adv + criterion(net(x_adv_var), y_var) #add adversarial loss loss_adv = loss_adv + criterion(self.model_list[i](x_adv_var), y_var) #add clean loss loss_adv = loss_adv + criterion(self.model_list[i](x_var), y_var) loss_adv = loss_adv/2.0 optimizer.zero_grad() loss_adv.backward() return loss_adv #TODO return loss for extension return feval #Tracer()() loss_adv = optimizer.step(helper()) #print('Epoch:', epoch, 'model:', i, 'loss', loss_adv.data.cpu().numpy()[0]) #print("Current timestamp: %s" % (utils.get_time_hhmmss())) else: x_adv = adv_train(x_adv, y.cpu().long(), self.model, criterion, adversary) x_adv_var = to_var(x_adv) loss_adv = criterion(self.model(x_adv_var), y_var) loss = (loss_adv + criterion(self.model(x_var), y_var))/2.0 self.optimizer.zero_grad() loss.backward() self.optimizer.step() else: if advtraining == 'Bayes': for i in range(Stheta): if epoch < 2: lr = init_lr elif epoch < 5: lr = 0.1*init_lr elif epoch < 10: lr = 0.1*init_lr else: lr = 0.05*init_lr #optimizer = SGHMC(self.model_list[i].parameters(), config=dict(lr=lr)) if args.outoptimizer == 'SGHMC': optimizer = SGHMC(filter(lambda x: x.requires_grad, self.model_list[i].parameters()), config=dict(lr=lr, T=args.T_out)) elif args.outoptimizer == 'SGAdaHMC': optimizer = SGAdaHMC(filter(lambda x: x.requires_grad, self.model_list[i].parameters()), config=dict(lr=0.01, T=args.T_out)) else: raise NotImplementedError('Outer optimizer not implemented') def helper(): def feval(): loss_adv = criterion(self.model_list[i](x_var), y_var) optimizer.zero_grad() loss_adv.backward() return loss_adv #TODO return loss for extension return feval #Tracer()() loss = optimizer.step(helper()) else: loss = criterion(self.model(x_var), y_var) self.optimizer.zero_grad() loss.backward() self.optimizer.step() if self.params.extra_debug and (i + 1) % (self.params.batch_size * 4) == 0: print(('Epoch: [{0}/{1}], Step: [{2}/{3}], Loss: {4},') .format(epoch + 1, self.params.epochs, i + 1, len(self.train_loader), loss.data[0])) print('entering validation loss set the advtraining method is ', advtraining) if advtraining == 'BayesWRM' or advtraining == 'Bayes': train_acc, train_loss = self.validate_model(self.train_loader, self.model_list[0]) val_acc, val_loss = self.validate_model(self.val_loader, self.model_list[0]) else: train_acc, train_loss = self.validate_model(self.train_loader, self.model) val_acc, val_loss = self.validate_model(self.val_loader, self.model) self.histories['train_loss'] = np.append(self.histories['train_loss'], [train_loss]) self.histories['val_loss'] = np.append(self.histories['val_loss'], [val_loss]) self.histories['val_acc'] = np.append(self.histories['val_acc'], [val_acc]) self.histories['train_acc'] = np.append(self.histories['train_acc'], [train_acc]) print('trianacc', str(train_acc), 'valacc', str(val_acc)) print('advtraining method', advtraining)
total += y_train.size(0) correct += predicted.eq(y_train.data).cuda().sum() torch.cuda.empty_cache() else: loss_cl = loss2(c_pre, y_train) loss_sum = torch.mul(loss, 1 / 1) + loss_cl if epoch + 1 > param['delay']: # use predicted label to prevent label leaking y_pred = pred_batch(torch.cat((x_train, x_train, x_train), 1), n) x_adv = adv_train(torch.cat((x_train, x_train, x_train), 1), y_pred, n, loss2, adversary) n.zero_grad() optimizer.zero_grad() x_adv_var = to_var(x_adv) y_pre, c_pre = n(x_adv_var) loss_adv = loss2(c_pre, y_train) + loss1( torch.mul(y_pre, 1.0), torch.mul(torch.cat( (x_train, x_train, x_train), 1), 1.0)) / 1 loss_sum = (loss_sum + loss_adv) / 2 loss_sum.backward(retain_graph=True) torch.nn.utils.clip_grad_norm(n.parameters(), 10.0) optimizer.step() epoch_loss += loss_sum.data.item() _, predicted = torch.max(c_pre.data, 1) total += y_train.size(0) correct += predicted.eq(y_train.data).cuda().sum() train_globa_step += 1
def perturb(self, X_nat, y): #refer to TF clevans implementation X = np.copy(X_nat) #X = np.clip(X, 0, 1) #X = 2*X - 1 #X = np.arctanh(X*.9999) batch_size = X.shape[0] index = y.view(-1, 1) #print('X.shape', X.shape) tlab = torch_extras.one_hot((X.shape[0], self.classes), index) tlab = to_var(torch.from_numpy(tlab.numpy().astype(np.long)), requires_grad=False) lower_bound = np.zeros(batch_size) X_var0 = to_var(torch.from_numpy(X), requires_grad=False) X_adv = np.copy(X_nat) if type(self.model) is list: for t in range(self.steps): grad_list = [] for model in self.model: X_var = to_var(torch.from_numpy(X), requires_grad=True) y_var = to_var(torch.LongTensor(y)) scores = model(X_var) tlab = tlab.type(torch.cuda.FloatTensor) real = torch.sum(torch.mul(scores, tlab)) other = torch.sum(torch.mul(scores, 1 - tlab)) loss1 = torch.clamp(real - other, min=0.0) #loss1 = real-other loss2 = (torch.sum((X_var - X_var0)**2) + 1e-9)**0.5 loss = loss1 + loss2 loss.backward() grad = X_var.grad.data.cpu().numpy() grad_list.append(grad) grad = np.mean(grad_list, axis=0) #go to the oposite direction as we wants to minimize the scores of the true label and maximize the scores of the wrong label X_adv = X_adv - 1. / np.sqrt(t + 2) * grad else: for t in range(self.steps): X_var = to_var(torch.from_numpy(X), requires_grad=True) y_var = to_var(torch.LongTensor(y)) scores = self.model(X_var) tlab = tlab.type(torch.cuda.FloatTensor) real = torch.sum(torch.mul(scores, tlab)) other = torch.sum(torch.mul(scores, 1 - tlab)) loss1 = torch.clamp(real - other, min=0.0) #loss1 = real-other loss2 = (torch.sum((X_var - X_var0)**2) + 1e-9)**0.5 loss = loss1 + loss2 loss.backward() grad = X_var.grad.data.cpu().numpy() X_adv = X_adv - 1. / np.sqrt(t + 2) * grad #print('maximum diff adv', np.max(np.abs(X_adv-X_nat))) if self.storeadv == True: X = np.clip(X, 0, 1) * 255 cv2.imwrite( os.path.join(advtrainfolder, self.advtraining + 'CWattacked.png'), np.squeeze(X[0, :])) exit(0) X_adv = np.clip(X_adv, 0, 1) return X_adv
def perturb(self, X_nat, y, epsilons=None): """ Given examples (X_nat, y), returns their adversarial counterparts with an attack length of epsilon. """ # Providing epsilons in batch if epsilons is not None: self.epsilon = epsilons X = np.copy(X_nat) if type(self.model) is list: grad_list = [] for model in self.model: X_var = to_var(torch.from_numpy(X), requires_grad=True) y_var = to_var(torch.LongTensor(y)) scores = model(X_var) loss = self.loss_fn(scores, y_var) loss.backward() grad_list.append(X_var.grad.data.cpu().numpy()) #grad_sign = np.mean(grad_list).sign().numpy() grad_sign = np.sign(np.mean(grad_list, axis=0)) X += self.epsilon * grad_sign X = np.clip(X, 0, 1) #print('maximum diff adv', np.max(np.abs(X-X_nat))) else: X_var = to_var(torch.from_numpy(X), requires_grad=True) y_var = to_var(torch.LongTensor(y)) scores = self.model(X_var) loss = self.loss_fn(scores, y_var) loss.backward() if self.pixelattack == 0: if self.order is 'inf': grad_sign = X_var.grad.data.cpu().sign().numpy() normalized_grad = grad_sign elif self.order is '2': grad = X_var.grad.data.cpu().numpy() square = sum(grad**2) normalized_grad = grad / np.sqrt(square) elif self.pixelattack != 0: grad = X_var.grad.data.cpu().numpy() topk = grad.flatten() topk.sort() topk = topk[-self.pixelattack] grad[grad < topk] = 0.0 grad[grad >= topk] = 1.0 normalized_grad = grad else: raise NotImplementedError( 'Only L-inf, L2 norms FGSM attacks are implemented') X += self.epsilon * normalized_grad if self.is_train == False: X = np.clip(X, 0, 1) #print('maximum diff adv', np.max(np.abs(X-X_nat))) if self.storeadv == True: X_display = np.clip(X, 0, 1) * 255 cv2.imwrite( os.path.join( advtrainfolder, self.advtraining + '_epsilon_' + str(self.epsilon) + '_fixedindex_' + str(self.storeindex) + '_FGSMAttack.png'), np.squeeze(X_display[0, :])) #exit(0) return X
def MNIST_bbox_sub(param, loader_hold_out, loader_test): """ Train a substitute model using Jacobian data augmentation arXiv:1602.02697 """ # Setup the substitute net = SubstituteModel() if torch.cuda.is_available(): print('CUDA ensabled for the substitute.') net.cuda() net.train() # Setup the oracle oracle = LeNet5() if torch.cuda.is_available(): print('CUDA ensabled for the oracle.') oracle.cuda() oracle.load_state_dict(torch.load(param['oracle_name'] + '.pkl')) oracle.eval() # Setup training criterion = nn.CrossEntropyLoss() # Careful optimization is crucial to train a well-representative # substitute. In Tensorflow Adam has some problem: # (https://github.com/tensorflow/cleverhans/issues/183) # But it works fine here in PyTorch (you may try other optimization # methods optimizer = torch.optim.Adam(net.parameters(), lr=param['learning_rate']) # Data held out for initial training data_iter = iter(loader_hold_out) X_sub, y_sub = data_iter.next() X_sub, y_sub = X_sub.numpy(), y_sub.numpy() # Train the substitute and augment dataset alternatively for rho in range(param['data_aug']): print("Substitute training epoch #" + str(rho)) print("Training data: " + str(len(X_sub))) rng = np.random.RandomState() # model training for epoch in range(param['nb_epochs']): print('Starting epoch %d / %d' % (epoch + 1, param['nb_epochs'])) # Compute number of batches nb_batches = int( np.ceil(float(len(X_sub)) / param['test_batch_size'])) assert nb_batches * param['test_batch_size'] >= len(X_sub) # Indices to shuffle training set index_shuf = list(range(len(X_sub))) rng.shuffle(index_shuf) for batch in range(nb_batches): # Compute batch start and end indices start, end = batch_indices(batch, len(X_sub), param['test_batch_size']) x = X_sub[index_shuf[start:end]] y = y_sub[index_shuf[start:end]] scores = net(to_var(torch.from_numpy(x))) loss = criterion(scores, to_var(torch.from_numpy(y).long())) optimizer.zero_grad() loss.backward() optimizer.step() print('loss = %.8f' % (loss.data[0])) test(net, loader_test, blackbox=True, hold_out_size=param['hold_out_size']) # If we are not at last substitute training iteration, augment dataset if rho < param['data_aug'] - 1: print("Augmenting substitute training data.") # Perform the Jacobian augmentation X_sub = jacobian_augmentation(net, X_sub, y_sub) print("Labeling substitute training data.") # Label the newly generated synthetic points using the black-box scores = oracle(to_var(torch.from_numpy(X_sub))) # Note here that we take the argmax because the adversary # only has access to the label (not the probabilities) output # by the black-box model y_sub = np.argmax(scores.data.cpu().numpy(), axis=1) torch.save(net.state_dict(), param['oracle_name'] + '_sub.pkl')