def main(self): """ Main method. """ database = utils.read_hdf5(self.args.database_file) log('[Data] read %s' % self.args.database_file) N_font = database.shape[0] N_class = database.shape[1] assert database.shape[2] == database.shape[3] database = database.reshape((database.shape[0] * database.shape[1], database.shape[2], database.shape[3])) database = torch.from_numpy(database).float() if self.args.use_gpu: database = database.cuda() database = torch.autograd.Variable(database) codes = utils.read_hdf5(self.args.codes_file) codes = codes[:, 0] codes = common.numpy.one_hot(codes, N_font * N_class) log('[Data] read %s' % self.args.codes_file) theta = utils.read_hdf5(self.args.theta_file) N = theta.shape[0] N_theta = theta.shape[1] log('[Data] read %s' % self.args.theta_file) model = models.OneHotDecoder(database, N_theta) images = [] num_batches = int(math.ceil(float(N) / self.args.batch_size)) for b in range(num_batches): batch_theta = torch.from_numpy( theta[b * self.args.batch_size:min((b + 1) * self.args.batch_size, N)]) batch_codes = torch.from_numpy( codes[b * self.args.batch_size:min((b + 1) * self.args.batch_size, N)]) batch_codes, batch_theta = batch_codes.float(), batch_theta.float() if self.args.use_gpu: batch_codes, batch_theta = batch_codes.cuda( ), batch_theta.cuda() batch_codes, batch_theta = torch.autograd.Variable( batch_codes), torch.autograd.Variable(batch_theta) output = model(batch_codes, batch_theta) images.append(output.data.cpu().numpy().squeeze()) if b % 1000 == 0: log('[Data] processed %d/%d batches' % (b + 1, num_batches)) images = numpy.concatenate(images, axis=0) if len(images.shape) > 3: images = numpy.transpose(images, (0, 2, 3, 1)) utils.write_hdf5(self.args.images_file, images) log('[Data] wrote %s' % self.args.images_file)
def test(self): """ Test classifier to identify valid samples to attack. """ num_batches = int( math.ceil(self.perturbations.shape[0] / self.args.batch_size)) for b in range(num_batches): b_start = b * self.args.batch_size b_end = min((b + 1) * self.args.batch_size, self.perturbations.shape[0]) batch_fonts = self.test_fonts[b_start:b_end] batch_classes = self.test_classes[b_start:b_end] batch_code = numpy.concatenate( (common.numpy.one_hot(batch_fonts, self.N_font), common.numpy.one_hot(batch_classes, self.N_class)), axis=1).astype(numpy.float32) batch_inputs = common.torch.as_variable( self.perturbations[b_start:b_end], self.args.use_gpu) batch_code = common.torch.as_variable(batch_code, self.args.use_gpu) # This basically allows to only optimize over theta, keeping the font/class code fixed. self.model.set_code(batch_code) output_images = self.model(batch_inputs) output_images = numpy.squeeze( numpy.transpose(output_images.cpu().detach().numpy(), (0, 2, 3, 1))) self.perturbation_images = common.numpy.concatenate( self.perturbation_images, output_images) if b % 100 == 0: log('[Testing] computing perturbation images %d' % b) utils.makedir(os.path.dirname(self.args.perturbation_images_file)) if len(self.perturbation_images.shape) > 3: self.perturbation_images = self.perturbation_images.reshape( self.N_samples, self.N_attempts, self.perturbation_images.shape[1], self.perturbation_images.shape[2], self.perturbation_images.shape[3]) else: self.perturbation_images = self.perturbation_images.reshape( self.N_samples, self.N_attempts, self.perturbation_images.shape[1], self.perturbation_images.shape[2]) self.perturbation_images = numpy.swapaxes(self.perturbation_images, 0, 1) utils.write_hdf5(self.args.perturbation_images_file, self.perturbation_images) log('[Testing] wrote %s' % self.args.perturbation_images_file)
def plot_manifolds(self): """ Plot manifolds. """ # # Plot all classes and adversarial examples in image space for individual classes as well as all classes. # fit = self.test_codes.shape[0]//25 test_images = self.test_images.reshape((self.test_images.shape[0], -1)) manifold_visualization = plot.ManifoldVisualization('tsne', pre_pca=40) manifold_visualization.fit(test_images[:fit]) log('[Testing] computed t-SNE on test images') for n in range(self.N_class): labels = ['Class %d' % (nn + 1) for nn in range(self.N_class)] + ['Adversarial Examples Class %d' % (n + 1)] data = numpy.concatenate(( test_images[:fit], self.perturbation_images[self.perturbation_codes == n] )) classes = numpy.concatenate(( self.test_codes[:fit], numpy.ones((self.perturbation_images[self.perturbation_codes == n].shape[0])) * 10, )) plot_file = os.path.join(self.args.plot_directory, 'perturbations_%d' % (n + 1)) manifold_visualization.visualize(plot_file, data, classes, labels, title='Adversarial Examples Class %d\n(The adversarial examples are projected into the embedding using learned SVRs)' % n) log('[Testing] wrote %s' % plot_file) labels = ['Class %d' % (n + 1) for n in range(self.N_class)] + ['Adversarial Examples Class %d' % (n + 1) for n in range(self.N_class)] data = numpy.concatenate(( test_images[:fit], self.perturbation_images )) classes = numpy.concatenate(( self.test_codes[:fit], self.perturbation_codes + 10, )) plot_file = os.path.join(self.args.plot_directory, 'perturbations') manifold_visualization.visualize(plot_file, data, classes, labels, title='Adversarial Examples\n(The adversarial examples are projected into the embedding using learned SVRs)') log('[Testing] wrote %s' % plot_file)
def compute_local_pca(self): """ Compute PCA. """ success = numpy.logical_and(self.success >= 0, self.accuracy) log('[Detection] %d valid attacked samples' % numpy.sum(success)) nearest_neighbor_images = self.nearest_neighbor_images.reshape(self.nearest_neighbor_images.shape[0], -1) nearest_neighbor_images = nearest_neighbor_images[:self.args.n_fit] perturbations = self.perturbations.reshape(self.perturbations.shape[0], -1) test_images = self.test_images.reshape(self.test_images.shape[0], -1) pure_perturbations = perturbations - test_images nearest_neighbors_indices = self.compute_nearest_neighbors(perturbations) self.distances['true'] = numpy.zeros((success.shape[0])) self.distances['test'] = numpy.zeros((success.shape[0])) self.distances['perturbation'] = numpy.zeros((success.shape[0])) self.angles['true'] = numpy.zeros((success.shape[0])) self.angles['test'] = numpy.zeros((success.shape[0])) self.angles['perturbation'] = numpy.zeros((success.shape[0])) for n in range(pure_perturbations.shape[0]): if success[n]: nearest_neighbors = nearest_neighbor_images[nearest_neighbors_indices[n, :]] nearest_neighbors = numpy.concatenate((nearest_neighbors, test_images[n].reshape(1, -1)), axis=0) pca = sklearn.decomposition.IncrementalPCA(n_components=self.args.n_pca) pca.fit(nearest_neighbors) reconstructed_test_images = pca.inverse_transform(pca.transform(test_images[n].reshape(1, -1))) reconstructed_perturbations = pca.inverse_transform(pca.transform(perturbations[n].reshape(1, -1))) reconstructed_pure_perturbations = pca.inverse_transform(pca.transform(pure_perturbations[n].reshape(1, -1))) self.distances['test'][n] = numpy.average(numpy.multiply(reconstructed_test_images - test_images[n], reconstructed_test_images - test_images[n]), axis=1) self.distances['perturbation'][n] = numpy.average(numpy.multiply(reconstructed_perturbations - perturbations[n], reconstructed_perturbations - perturbations[n]), axis=1) self.distances['true'][n] = numpy.average(numpy.multiply(reconstructed_pure_perturbations - pure_perturbations[n], reconstructed_pure_perturbations - pure_perturbations[n]), axis=1) self.angles['test'][n] = numpy.rad2deg(common.numpy.angles(reconstructed_test_images.T, test_images[n].T)) self.angles['perturbation'][n] = numpy.rad2deg(common.numpy.angles(reconstructed_perturbations.T, perturbations[n].T)) self.angles['true'][n] = numpy.rad2deg(common.numpy.angles(reconstructed_pure_perturbations.T, pure_perturbations[n].T)) log('[Detection] %d: true distance=%g angle=%g' % (n, self.distances['true'][n], self.angles['true'][n])) log('[Detection] %d: perturbation distance=%g angle=%g' % (n, self.distances['perturbation'][n], self.angles['perturbation'][n])) log('[Detection] %d: test distance=%g angle=%g' % (n, self.distances['test'][n], self.angles['test'][n])) self.distances['test'] = self.distances['test'][success] self.distances['perturbation'] = self.distances['perturbation'][success] self.distances['true'] = self.distances['true'][success]
def compute_images(self): """ Compute images through decoder. """ assert self.model.training is False num_batches = int( math.ceil(self.perturbations.shape[0] / self.args.batch_size)) for b in range(num_batches): b_start = b * self.args.batch_size b_end = min((b + 1) * self.args.batch_size, self.perturbations.shape[0]) batch_fonts = self.perturbation_codes[b_start:b_end, 1] batch_classes = self.perturbation_codes[b_start:b_end, 2] batch_code = numpy.concatenate( (common.numpy.one_hot(batch_fonts, self.N_font), common.numpy.one_hot(batch_classes, self.N_class)), axis=1).astype(numpy.float32) batch_code = common.torch.as_variable(batch_code, self.args.use_gpu) batch_theta = common.torch.as_variable( self.perturbation_theta[b_start:b_end], self.args.use_gpu) theta_images = self.model(batch_code, batch_theta) batch_perturbation = common.torch.as_variable( self.perturbations[b_start:b_end], self.args.use_gpu) perturbation_images = self.model(batch_code, batch_perturbation) if b % 100: log('[Testing] %d' % b) theta_images = numpy.squeeze( numpy.transpose(theta_images.cpu().detach().numpy(), (0, 2, 3, 1))) self.theta_images = common.numpy.concatenate( self.theta_images, theta_images) perturbation_images = numpy.squeeze( numpy.transpose(perturbation_images.cpu().detach().numpy(), (0, 2, 3, 1))) self.perturbation_images = common.numpy.concatenate( self.perturbation_images, perturbation_images) self.theta_images = self.theta_images.reshape( (self.theta_images.shape[0], -1)) self.perturbation_images = self.perturbation_images.reshape( (self.perturbation_images.shape[0], -1))
def compute_images(self): """ Compute images. """ assert self.test_codes is not None num_batches = int( math.ceil(self.perturbations.shape[0] / self.args.batch_size)) for b in range(num_batches): b_start = b * self.args.batch_size b_end = min((b + 1) * self.args.batch_size, self.perturbations.shape[0]) batch_fonts = self.test_codes[b_start:b_end, 1] batch_classes = self.test_codes[b_start:b_end, 2] batch_code = numpy.concatenate( (common.numpy.one_hot(batch_fonts, self.N_font), common.numpy.one_hot(batch_classes, self.N_class)), axis=1).astype(numpy.float32) batch_code = common.torch.as_variable(batch_code, self.args.use_gpu) batch_perturbation = common.torch.as_variable( self.perturbations[b_start:b_end].astype(numpy.float32), self.args.use_gpu) perturbation_images = self.model(batch_code, batch_perturbation) if b % 100: log('[Testing] %d' % b) perturbation_images = numpy.squeeze( perturbation_images.cpu().detach().numpy()) self.perturbation_images = common.numpy.concatenate( self.perturbation_images, perturbation_images) # Trick to perform analysis on actual images of adversarial examples. self.perturbations = self.perturbation_images
def test(self): """ Test classifier to identify valid samples to attack. """ num_batches = int(math.ceil(self.test_theta.shape[0] / self.args.batch_size)) for b in range(num_batches): b_start = b * self.args.batch_size b_end = min((b + 1) * self.args.batch_size, self.test_theta.shape[0]) batch_fonts = self.test_fonts[b_start: b_end] batch_classes = self.test_classes[b_start: b_end] batch_code = numpy.concatenate((common.numpy.one_hot(batch_fonts, self.N_font), common.numpy.one_hot(batch_classes, self.N_class)), axis=1).astype(numpy.float32) batch_classes = common.torch.as_variable(batch_classes, self.args.use_gpu) batch_inputs = common.torch.as_variable(self.test_theta[b_start: b_end], self.args.use_gpu) batch_code = common.torch.as_variable(batch_code, self.args.use_gpu) # This basically allows to only optimize over theta, keeping the font/class code fixed. self.model.decoder.set_code(batch_code) output_classes = self.model(batch_inputs) values, indices = torch.max(torch.nn.functional.softmax(output_classes, dim=1), dim=1) errors = torch.abs(indices - batch_classes) self.accuracy = common.numpy.concatenate(self.accuracy, errors.data.cpu().numpy()) if b % 100 == 0: log('[Attack] computing accuracy %d' % b) self.accuracy = self.accuracy == 0 utils.write_hdf5(self.args.accuracy_file, self.accuracy) log('[Attack] wrote %s' % self.args.accuracy_file) accuracy = numpy.sum(self.accuracy) / float(self.accuracy.shape[0]) log('[Attack] accuracy %g' % accuracy) accuracy = numpy.sum(self.accuracy[:self.args.max_samples]) / float(self.args.max_samples) log('[Attack] accuracy on %d samples %g' % (self.args.max_samples, accuracy))
def train(self, epoch): """ Train for one epoch. :param epoch: current epoch :type epoch: int """ assert self.encoder is not None and self.decoder is not None assert self.scheduler is not None self.auto_encoder.train() log('[Training] %d set auto encoder to train' % epoch) self.encoder.train() log('[Training] %d set encoder to train' % epoch) self.decoder.train() log('[Training] %d set decoder to train' % epoch) num_batches = int(math.ceil(self.train_images.shape[0]/self.args.batch_size)) assert self.encoder.training is True permutation = numpy.random.permutation(self.train_images.shape[0]) permutation = numpy.concatenate((permutation, permutation[:self.args.batch_size]), axis=0) for b in range(num_batches): self.scheduler.update(epoch, float(b)/num_batches) perm = permutation[b * self.args.batch_size: (b + 1) * self.args.batch_size] batch_images = common.torch.as_variable(self.train_images[perm], self.args.use_gpu) batch_images = batch_images.permute(0, 3, 1, 2) output_images, output_mu, output_logvar = self.auto_encoder(batch_images) reconstruction_loss = self.reconstruction_loss(batch_images, output_images) self.scheduler.optimizer.zero_grad() latent_loss = self.latent_loss(output_mu, output_logvar) loss = self.args.beta*reconstruction_loss + latent_loss loss.backward() self.scheduler.optimizer.step() reconstruction_loss = reconstruction_loss.item() latent_loss = latent_loss.item() reconstruction_error = self.reconstruction_error(batch_images, output_images) reconstruction_error = reconstruction_error.item() iteration = epoch*num_batches + b + 1 self.train_statistics = numpy.vstack((self.train_statistics, numpy.array([ iteration, iteration * self.args.batch_size, min(num_batches, iteration), min(num_batches, iteration) * self.args.batch_size, reconstruction_loss, reconstruction_error, latent_loss, torch.mean(output_mu).item(), torch.var(output_mu).item(), torch.mean(output_logvar).item(), ]))) skip = 10 if b%skip == skip//2: log('[Training] %d | %d: %g (%g) %g %g %g %g' % ( epoch, b, numpy.mean(self.train_statistics[max(0, iteration-skip):iteration, 4]), numpy.mean(self.train_statistics[max(0, iteration-skip):iteration, 5]), numpy.mean(self.train_statistics[max(0, iteration-skip):iteration, 6]), numpy.mean(self.train_statistics[max(0, iteration-skip):iteration, 7]), numpy.mean(self.train_statistics[max(0, iteration-skip):iteration, 8]), numpy.mean(self.train_statistics[max(0, iteration-skip):iteration, 9]), ))
def compute_nn(self, inclusive=False): """ Test detector. """ success = numpy.logical_and(self.success >= 0, self.accuracy) log('[Detection] %d valid attacked samples' % numpy.sum(success)) nearest_neighbor_images = self.nearest_neighbor_images.reshape(self.nearest_neighbor_images.shape[0], -1) perturbations = self.perturbations.reshape(self.perturbations.shape[0], -1) test_images = self.test_images.reshape(self.test_images.shape[0], -1) nearest_neighbors_indices = self.compute_nearest_neighbors(perturbations) pure_perturbations = perturbations - test_images log('[Detection] computed nearest neighbors for perturbations') self.distances['true'] = numpy.zeros((success.shape[0])) self.distances['test'] = numpy.zeros((success.shape[0])) self.distances['perturbation'] = numpy.zeros((success.shape[0])) self.angles['true'] = numpy.zeros((success.shape[0])) self.angles['test'] = numpy.zeros((success.shape[0])) self.angles['perturbation'] = numpy.zeros((success.shape[0])) for n in range(pure_perturbations.shape[0]): if success[n]: nearest_neighbors = nearest_neighbor_images[nearest_neighbors_indices[n, :]] if inclusive: nearest_neighbors = numpy.concatenate((nearest_neighbors, test_images[n].reshape(1, -1)), axis=0) nearest_neighbor_mean = test_images[n] else: nearest_neighbor_mean = numpy.average(nearest_neighbors, axis=0) nearest_neighbor_basis = nearest_neighbors - nearest_neighbor_mean relative_perturbation = perturbations[n] - nearest_neighbor_mean relative_test_image = test_images[n] - nearest_neighbor_mean if inclusive: assert numpy.allclose(relative_test_image, nearest_neighbor_basis[-1]) nearest_neighbor_vectors = numpy.stack(( pure_perturbations[n], relative_perturbation, relative_test_image ), axis=1) nearest_neighbor_projections = common.numpy.project_orthogonal(nearest_neighbor_basis.T, nearest_neighbor_vectors) assert nearest_neighbor_vectors.shape[0] == nearest_neighbor_projections.shape[0] assert nearest_neighbor_vectors.shape[1] == nearest_neighbor_projections.shape[1] angles = numpy.rad2deg(common.numpy.angles(nearest_neighbor_vectors, nearest_neighbor_projections)) distances = numpy.linalg.norm(nearest_neighbor_vectors - nearest_neighbor_projections, ord=2, axis=0) assert distances.shape[0] == 3 assert angles.shape[0] == 3 self.distances['true'][n] = distances[0] self.distances['perturbation'][n] = distances[1] self.distances['test'][n] = distances[2] self.angles['true'][n] = angles[0] self.angles['perturbation'][n] = angles[1] self.angles['test'][n] = angles[2] log('[Detection] %d: true distance=%g angle=%g' % (n, self.distances['true'][n], self.angles['true'][n])) log('[Detection] %d: perturbation distance=%g angle=%g' % (n, self.distances['perturbation'][n], self.angles['perturbation'][n])) log('[Detection] %d: test distance=%g angle=%g' % (n, self.distances['test'][n], self.angles['test'][n])) self.distances['true'] = self.distances['true'][success] self.distances['test'] = self.distances['test'][success] self.distances['perturbation'] = self.distances['perturbation'][success] self.angles['true'] = self.angles['true'][success] self.angles['test'] = self.angles['test'][success] self.angles['perturbation'] = self.angles['perturbation'][success] if inclusive: self.distances['test'][:] = 0 self.angles['test'][:] = 0
def attack(self): """ Test the model. """ assert self.model is not None assert self.model.classifier.training is False concatenate_axis = -1 if os.path.exists(self.args.perturbations_file) and os.path.exists(self.args.success_file): self.original_perturbations = utils.read_hdf5(self.args.perturbations_file) assert len(self.original_perturbations.shape) == 3, self.original_perturbations.shape log('[Attack] read %s' % self.args.perturbations_file) self.original_success = utils.read_hdf5(self.args.success_file) log('[Attack] read %s' % self.args.success_file) assert self.original_perturbations.shape[0] == self.original_success.shape[0] assert self.original_perturbations.shape[1] == self.original_success.shape[1] assert self.original_perturbations.shape[2] == self.test_theta.shape[1] if self.original_perturbations.shape[1] <= self.args.max_samples and self.original_perturbations.shape[0] <= self.args.max_attempts: log('[Attack] found %d attempts, %d samples, requested no more' % (self.original_perturbations.shape[0], self.original_perturbations.shape[1])) return elif self.original_perturbations.shape[0] == self.args.max_attempts or self.original_perturbations.shape[1] == self.args.max_samples: if self.original_perturbations.shape[0] == self.args.max_attempts: self.test_theta = self.test_theta[self.original_perturbations.shape[1]:] self.test_fonts = self.test_fonts[self.original_perturbations.shape[1]:] self.test_classes = self.test_classes[self.original_perturbations.shape[1]:] self.args.max_samples = self.args.max_samples - self.original_perturbations.shape[1] concatenate_axis = 1 log('[Attack] found %d attempts with %d perturbations, computing %d more perturbations' % ( self.original_perturbations.shape[0], self.original_perturbations.shape[1], self.args.max_samples)) elif self.original_perturbations.shape[1] == self.args.max_samples: self.args.max_attempts = self.args.max_attempts - self.original_perturbations.shape[0] concatenate_axis = 0 log('[Attack] found %d attempts with %d perturbations, computing %d more attempts' % ( self.original_perturbations.shape[0], self.original_perturbations.shape[1], self.args.max_attempts)) self.perturbations = numpy.zeros((self.args.max_attempts, self.args.max_samples, self.test_theta.shape[1])) self.success = numpy.ones((self.args.max_attempts, self.args.max_samples), dtype=int) * -1 if self.args.attack.find('Batch') >= 0: batch_size = min(self.args.batch_size, self.args.max_samples) else: batch_size = 1 objective = self.objective_class() num_batches = int(math.ceil(self.args.max_samples/batch_size)) for i in range(num_batches): if i*batch_size == self.args.max_samples: break i_start = i * batch_size i_end = min((i + 1) * batch_size, self.args.max_samples) batch_fonts = self.test_fonts[i_start: i_end] batch_classes = self.test_classes[i_start: i_end] batch_code = numpy.concatenate((common.numpy.one_hot(batch_fonts, self.N_font), common.numpy.one_hot(batch_classes, self.N_class)), axis=1).astype(numpy.float32) batch_classes = common.torch.as_variable(batch_classes, self.args.use_gpu) batch_inputs = common.torch.as_variable(self.test_theta[i_start: i_end], self.args.use_gpu) batch_code = common.torch.as_variable(batch_code, self.args.use_gpu) t = 0 # This basically allows to only optimize over theta, keeping the font/class code fixed. self.model.decoder.set_code(batch_code) while True and t < self.args.max_attempts: attack = self.setup_attack(batch_inputs, batch_classes) success, perturbations, probabilities, norm, _ = attack.run(objective) assert not numpy.any(perturbations != perturbations), perturbations # Note that we save the perturbed image, not only the perturbation! perturbations = perturbations.reshape(batch_inputs.size()) # hack for when only one dimensional latent space is used! self.perturbations[t][i_start: i_end] = perturbations + batch_inputs.cpu().numpy() self.success[t][i_start: i_end] = success t += 1 log('[Attack] %d: completed' % i) if concatenate_axis >= 0: if self.perturbations.shape[0] == self.args.max_attempts: self.perturbations = numpy.concatenate((self.original_perturbations, self.perturbations), axis=concatenate_axis) self.success = numpy.concatenate((self.original_success, self.success), axis=concatenate_axis) log('[Attack] concatenated') utils.write_hdf5(self.args.perturbations_file, self.perturbations) log('[Attack] wrote %s' % self.args.perturbations_file) utils.write_hdf5(self.args.success_file, self.success) log('[Attack] wrote %s' % self.args.success_file)
def compute_true(self): """ Compute true. """ assert self.test_codes is not None num_batches = int(math.ceil(self.perturbations.shape[0] / self.args.batch_size)) params = { 'lr': 0.09, 'lr_decay': 0.95, 'lr_min': 0.0000001, 'weight_decay': 0, } for b in range(num_batches): b_start = b * self.args.batch_size b_end = min((b + 1) * self.args.batch_size, self.perturbations.shape[0]) batch_fonts = self.test_codes[b_start: b_end, 1] batch_classes = self.test_codes[b_start: b_end, 2] batch_code = numpy.concatenate((common.numpy.one_hot(batch_fonts, self.N_font), common.numpy.one_hot(batch_classes, self.N_class)), axis=1).astype( numpy.float32) batch_code = common.torch.as_variable(batch_code, self.args.use_gpu) batch_images = common.torch.as_variable(self.test_images[b_start: b_end], self.args.use_gpu) batch_images = batch_images.permute(0, 3, 1, 2) batch_theta = common.torch.as_variable(self.test_theta[b_start: b_end].astype(numpy.float32), self.args.use_gpu, True) batch_perturbation = common.torch.as_variable(self.perturbations[b_start: b_end].astype(numpy.float32), self.args.use_gpu) self.model.set_code(batch_code) #output_images = self.model.forward(batch_theta) #test_error = torch.mean(torch.mul(output_images - batch_images, output_images - batch_images)) #print(test_error.item()) #vis.mosaic('true.png', batch_images.cpu().detach().numpy()[:, 0, :, :]) #vis.mosaic('output.png', output_images.cpu().detach().numpy()[:, 0, :, :]) # print(batch_images.cpu().detach().numpy()[0]) # print(output_images.cpu().detach().numpy()[0, 0]) #_batch_images = batch_images.cpu().detach().numpy() #_output_images = output_images.cpu().detach().numpy()[:, 0, :, :] #test_error = numpy.max(numpy.abs(_batch_images.reshape(_batch_images.shape[0], -1) - _output_images.reshape(_output_images.shape[0], -1)), axis=1) #print(test_error) #test_error = numpy.mean(numpy.multiply(_batch_images - _output_images, _batch_images - _output_images), axis=1) #print(test_error) batch_theta = torch.nn.Parameter(batch_theta) scheduler = ADAMScheduler([batch_theta], **params) log('[Detection] %d: start' % b) for t in range(100): scheduler.update(t//10, float(t)/10) scheduler.optimizer.zero_grad() output_perturbation = self.model.forward(batch_theta) error = torch.mean(torch.mul(output_perturbation - batch_perturbation, output_perturbation - batch_perturbation)) test_error = torch.mean(torch.mul(output_perturbation - batch_images, output_perturbation - batch_images)) #error.backward() #scheduler.optimizer.step() log('[Detection] %d: %d = %g, %g' % (b, t, error.item(), test_error.item())) output_perturbation = numpy.squeeze(numpy.transpose(output_perturbation.cpu().detach().numpy(), (0, 2, 3, 1))) self.projected_perturbations = common.numpy.concatenate(self.projected_perturbations, output_perturbation) projected_perturbations = self.projected_perturbations.reshape((self.projected_perturbations.shape[0], -1)) perturbations = self.perturbations.reshape((self.perturbations.shape[0], -1)) success = numpy.logical_and(self.success >= 0, self.accuracy) log('[Detection] %d valid attacked samples' % numpy.sum(success)) self.distances['true'] = numpy.linalg.norm(perturbations - projected_perturbations, ord=2, axis=1) self.angles['true'] = numpy.rad2deg(common.numpy.angles(perturbations.T, projected_perturbations.T)) self.distances['true'] = self.distances['true'][success] self.angles['true'] = self.angles['true'][success] self.distances['test'] = numpy.zeros((numpy.sum(success))) self.angles['test'] = numpy.zeros((numpy.sum(success)))
def load_data_and_model(self): """ Load data and model. """ database = utils.read_hdf5(self.args.database_file).astype( numpy.float32) log('[Visualization] read %s' % self.args.database_file) N_font = database.shape[0] N_class = database.shape[1] resolution = database.shape[2] database = database.reshape((database.shape[0] * database.shape[1], database.shape[2], database.shape[3])) database = torch.from_numpy(database) if self.args.use_gpu: database = database.cuda() database = torch.autograd.Variable(database, False) self.test_images = utils.read_hdf5(self.args.test_images_file).astype( numpy.float32) if len(self.test_images.shape) < 4: self.test_images = numpy.expand_dims(self.test_images, axis=3) self.perturbations = utils.read_hdf5( self.args.perturbations_file).astype(numpy.float32) self.perturbations = numpy.swapaxes(self.perturbations, 0, 1) log('[Visualization] read %s' % self.args.perturbations_file) self.success = utils.read_hdf5(self.args.success_file) self.success = numpy.swapaxes(self.success, 0, 1) log('[Visualization] read %s' % self.args.success_file) self.accuracy = utils.read_hdf5(self.args.accuracy_file) log('[Visualization] read %s' % self.args.success_file) self.test_theta = utils.read_hdf5(self.args.test_theta_file).astype( numpy.float32) self.test_theta = self.test_theta[:self.perturbations.shape[0]] N_theta = self.test_theta.shape[1] log('[Visualization] using %d N_theta' % N_theta) log('[Visualization] read %s' % self.args.test_theta_file) self.test_codes = utils.read_hdf5(self.args.test_codes_file).astype( numpy.int) self.test_codes = self.test_codes[:self.perturbations.shape[0]] self.test_codes = self.test_codes[:, 1:3] self.test_codes = numpy.concatenate( (common.numpy.one_hot(self.test_codes[:, 0], N_font), common.numpy.one_hot(self.test_codes[:, 1], N_class)), axis=1).astype(numpy.float32) log('[Attack] read %s' % self.args.test_codes_file) image_channels = 1 if N_theta <= 7 else 3 network_units = list(map(int, self.args.network_units.split(','))) log('[Visualization] using %d input channels' % image_channels) self.classifier = models.Classifier( N_class, resolution=(image_channels, resolution, resolution), architecture=self.args.network_architecture, activation=self.args.network_activation, batch_normalization=not self.args.network_no_batch_normalization, start_channels=self.args.network_channels, dropout=self.args.network_dropout, units=network_units) self.decoder = models.AlternativeOneHotDecoder(database, N_font, N_class, N_theta) self.decoder.eval() assert os.path.exists( self.args.classifier_file ), 'state file %s not found' % self.args.classifier_file state = State.load(self.args.classifier_file) log('[Visualization] read %s' % self.args.classifier_file) self.classifier.load_state_dict(state.model) if self.args.use_gpu and not cuda.is_cuda(self.classifier): log('[Visualization] classifier is not CUDA') self.classifier = self.classifier.cuda() log('[Visualization] loaded classifier') self.classifier.eval() log('[Visualization] set classifier to eval')
def train(self): """ Train with fair data augmentation. """ self.model.train() assert self.model.training is True split = self.args.batch_size // 2 num_batches = int( math.ceil(self.train_images.shape[0] / self.args.batch_size)) permutation = numpy.random.permutation(self.train_images.shape[0]) for b in range(num_batches): self.scheduler.update(self.epoch, float(b) / num_batches) perm = numpy.take(permutation, range(b * self.args.batch_size, (b + 1) * self.args.batch_size), mode='wrap') batch_images = common.torch.as_variable(self.train_images[perm], self.args.use_gpu) batch_theta = common.torch.as_variable(self.train_theta[perm], self.args.use_gpu) batch_images = batch_images.permute(0, 3, 1, 2) batch_fonts = self.train_codes[perm, 1] batch_classes = self.train_codes[perm, self.args.label_index] batch_code = numpy.concatenate( (common.numpy.one_hot(batch_fonts, self.N_font), common.numpy.one_hot(batch_classes, self.N_class)), axis=1).astype(numpy.float32) batch_code = common.torch.as_variable(batch_code, self.args.use_gpu) batch_classes = common.torch.as_variable(batch_classes, self.args.use_gpu) loss = error = gradient = 0 if self.args.full_variant: for t in range(self.args.max_iterations): if self.args.strong_variant: # Here, we want to uniformly sample all allowed transformations, so that's OK. min_bound = numpy.repeat(self.min_bound.reshape(1, -1), self.args.batch_size, axis=0) max_bound = numpy.repeat(self.max_bound.reshape(1, -1), self.args.batch_size, axis=0) random = numpy.random.uniform( min_bound, max_bound, (batch_theta.size(0), batch_theta.size(1))) batch_perturbed_theta = common.torch.as_variable( random.astype(numpy.float32), self.args.use_gpu) self.decoder.set_code(batch_code) batch_perturbed_images = self.decoder( batch_perturbed_theta) else: random = common.numpy.uniform_ball( batch_theta.size(0), batch_theta.size(1), epsilon=self.args.epsilon, ord=self.norm) batch_perturbed_theta = batch_theta + common.torch.as_variable( random.astype(numpy.float32), self.args.use_gpu) batch_perturbed_theta = torch.min( common.torch.as_variable(self.max_bound, self.args.use_gpu), batch_perturbed_theta) batch_perturbed_theta = torch.max( common.torch.as_variable(self.min_bound, self.args.use_gpu), batch_perturbed_theta) self.decoder.set_code(batch_code) batch_perturbed_images = self.decoder( batch_perturbed_theta) output_classes = self.model(batch_perturbed_images) self.scheduler.optimizer.zero_grad() l = self.loss(batch_classes, output_classes) l.backward() self.scheduler.optimizer.step() loss += l.item() g = torch.mean( torch.abs(list(self.model.parameters())[0].grad)) gradient += g.item() e = self.error(batch_classes, output_classes) error += e.item() batch_perturbations = batch_perturbed_images - batch_images gradient /= self.args.max_iterations loss /= self.args.max_iterations error /= self.args.max_iterations perturbation_loss = loss perturbation_error = error else: output_classes = self.model(batch_images[:split]) self.scheduler.optimizer.zero_grad() l = self.loss(batch_classes[:split], output_classes) l.backward() self.scheduler.optimizer.step() loss = l.item() gradient = torch.mean( torch.abs(list(self.model.parameters())[0].grad)) gradient = gradient.item() e = self.error(batch_classes[:split], output_classes) error = e.item() perturbation_loss = perturbation_error = 0 for t in range(self.args.max_iterations): if self.args.strong_variant: # Again, sampling all possible transformations. min_bound = numpy.repeat(self.min_bound.reshape(1, -1), split, axis=0) max_bound = numpy.repeat(self.max_bound.reshape(1, -1), split, axis=0) random = numpy.random.uniform( min_bound, max_bound, (split, batch_theta.size(1))) batch_perturbed_theta = common.torch.as_variable( random.astype(numpy.float32), self.args.use_gpu) self.decoder.set_code(batch_code[split:]) batch_perturbed_images = self.decoder( batch_perturbed_theta) else: random = common.numpy.uniform_ball( split, batch_theta.size(1), epsilon=self.args.epsilon, ord=self.norm) batch_perturbed_theta = batch_theta[ split:] + common.torch.as_variable( random.astype(numpy.float32), self.args.use_gpu) batch_perturbed_theta = torch.min( common.torch.as_variable(self.max_bound, self.args.use_gpu), batch_perturbed_theta) batch_perturbed_theta = torch.max( common.torch.as_variable(self.min_bound, self.args.use_gpu), batch_perturbed_theta) self.decoder.set_code(batch_code[split:]) batch_perturbed_images = self.decoder( batch_perturbed_theta) output_classes = self.model(batch_perturbed_images) self.scheduler.optimizer.zero_grad() l = self.loss(batch_classes[split:], output_classes) l.backward() self.scheduler.optimizer.step() perturbation_loss += l.item() g = torch.mean( torch.abs(list(self.model.parameters())[0].grad)) gradient += g.item() e = self.error(batch_classes[split:], output_classes) perturbation_error += e.item() batch_perturbations = batch_perturbed_images - batch_images[ split:] gradient /= self.args.max_iterations + 1 perturbation_loss /= self.args.max_iterations perturbation_error /= self.args.max_iterations iteration = self.epoch * num_batches + b + 1 self.train_statistics = numpy.vstack(( self.train_statistics, numpy.array([[ iteration, # iterations iteration * (1 + self.args.max_iterations) * self.args.batch_size, # samples seen min(num_batches, iteration) * self.args.batch_size + iteration * self.args.max_iterations * self.args.batch_size, # unique samples seen loss, error, perturbation_loss, perturbation_error, gradient ]]))) if b % self.args.skip == self.args.skip // 2: log('[Training] %d | %d: %g (%g) %g (%g) [%g]' % ( self.epoch, b, numpy.mean(self.train_statistics[ max(0, iteration - self.args.skip):iteration, 3]), numpy.mean(self.train_statistics[ max(0, iteration - self.args.skip):iteration, 4]), numpy.mean(self.train_statistics[ max(0, iteration - self.args.skip):iteration, 5]), numpy.mean(self.train_statistics[ max(0, iteration - self.args.skip):iteration, 6]), numpy.mean(self.train_statistics[ max(0, iteration - self.args.skip):iteration, -1]), )) self.debug('clean.%d.png' % self.epoch, batch_images.permute(0, 2, 3, 1)) self.debug('perturbed.%d.png' % self.epoch, batch_perturbed_images.permute(0, 2, 3, 1)) self.debug('perturbation.%d.png' % self.epoch, batch_perturbations.permute(0, 2, 3, 1), cmap='seismic')
def train(self): """ Train adversarially. """ num_batches = int( math.ceil(self.train_images.shape[0] / self.args.batch_size)) permutation = numpy.random.permutation(self.train_images.shape[0]) perturbation_permutation = numpy.random.permutation( self.train_images.shape[0]) if self.args.safe: perturbation_permutation = perturbation_permutation[ self.train_valid == 1] else: perturbation_permuation = permutation for b in range(num_batches): self.scheduler.update(self.epoch, float(b) / num_batches) self.model.eval() assert self.model.training is False objective = self.objective_class() split = self.args.batch_size // 2 if self.args.full_variant: perm = numpy.concatenate( (numpy.take(permutation, range(b * self.args.batch_size, b * self.args.batch_size + split), mode='wrap'), numpy.take(perturbation_permutation, range(b * self.args.batch_size + split, (b + 1) * self.args.batch_size), mode='wrap')), axis=0) batch_images = common.torch.as_variable( self.train_images[perm], self.args.use_gpu) batch_classes = common.torch.as_variable( self.train_codes[perm], self.args.use_gpu) batch_theta = common.torch.as_variable(self.train_theta[perm], self.args.use_gpu) batch_images = batch_images.permute(0, 3, 1, 2) attack = self.setup_attack(self.model, batch_images[:split], batch_classes[:split]) success, perturbations, _, _, _ = attack.run( objective, self.args.verbose) batch_perturbations1 = common.torch.as_variable( perturbations.astype(numpy.float32), self.args.use_gpu) batch_perturbed_images1 = batch_images[:split] + batch_perturbations1 if isinstance(self.decoder, models.SelectiveDecoder): self.decoder.set_code(batch_classes[split:]) attack = self.setup_decoder_attack(self.decoder_classifier, batch_theta[split:], batch_classes[split:]) attack.set_bound(torch.from_numpy(self.min_bound), torch.from_numpy(self.max_bound)) decoder_success, decoder_perturbations, probabilities, norm, _ = attack.run( objective, self.args.verbose) batch_perturbed_theta = batch_theta[ split:] + common.torch.as_variable(decoder_perturbations, self.args.use_gpu) batch_perturbed_images2 = self.decoder(batch_perturbed_theta) batch_perturbations2 = batch_perturbed_images2 - batch_images[ split:] batch_input_images = torch.cat( (batch_perturbed_images1, batch_perturbed_images2), dim=0) self.model.train() assert self.model.training is True output_classes = self.model(batch_input_images) self.scheduler.optimizer.zero_grad() perturbation_loss = self.loss(batch_classes[:split], output_classes[:split]) decoder_perturbation_loss = self.loss(batch_classes[split:], output_classes[split:]) loss = (perturbation_loss + decoder_perturbation_loss) / 2 loss.backward() self.scheduler.optimizer.step() loss = loss.item() perturbation_loss = perturbation_loss.item() decoder_perturbation_loss = decoder_perturbation_loss.item() gradient = torch.mean( torch.abs(list(self.model.parameters())[0].grad)) gradient = gradient.item() perturbation_error = self.error(batch_classes[:split], output_classes[:split]) perturbation_error = perturbation_error.item() decoder_perturbation_error = self.error( batch_classes[split:], output_classes[split:]) decoder_perturbation_error = decoder_perturbation_error.item() error = (perturbation_error + decoder_perturbation_error) / 2 else: perm = numpy.concatenate(( numpy.take( perturbation_permutation, range(b * self.args.batch_size + split + split // 2, (b + 1) * self.args.batch_size), mode='wrap'), numpy.take( permutation, range(b * self.args.batch_size, b * self.args.batch_size + split + split // 2), mode='wrap'), ), axis=0) batch_images = common.torch.as_variable( self.train_images[perm], self.args.use_gpu) batch_classes = common.torch.as_variable( self.train_codes[perm], self.args.use_gpu) batch_theta = common.torch.as_variable(self.train_theta[perm], self.args.use_gpu) batch_images = batch_images.permute(0, 3, 1, 2) attack = self.setup_attack(self.model, batch_images[split // 2:split], batch_classes[split // 2:split]) success, perturbations, _, _, _ = attack.run( objective, self.args.verbose) batch_perturbations1 = common.torch.as_variable( perturbations.astype(numpy.float32), self.args.use_gpu) batch_perturbed_images1 = batch_images[ split // 2:split] + batch_perturbations1 if isinstance(self.decoder, models.SelectiveDecoder): self.decoder.set_code(batch_classes[:split // 2]) attack = self.setup_decoder_attack(self.decoder_classifier, batch_theta[:split // 2], batch_classes[:split // 2]) attack.set_bound(torch.from_numpy(self.min_bound), torch.from_numpy(self.max_bound)) decoder_success, decoder_perturbations, probabilities, norm, _ = attack.run( objective, self.args.verbose) batch_perturbed_theta = batch_theta[:split // 2] + common.torch.as_variable( decoder_perturbations, self.args.use_gpu) batch_perturbed_images2 = self.decoder(batch_perturbed_theta) batch_perturbations2 = batch_perturbed_images2 - batch_images[:split // 2] batch_input_images = torch.cat( (batch_perturbed_images2, batch_perturbed_images1, batch_images[split:]), dim=0) self.model.train() assert self.model.training is True output_classes = self.model(batch_input_images) self.scheduler.optimizer.zero_grad() loss = self.loss(batch_classes[split:], output_classes[split:]) perturbation_loss = self.loss(batch_classes[split // 2:split], output_classes[split // 2:split]) decoder_perturbation_loss = self.loss( batch_classes[:split // 2], output_classes[:split // 2]) l = (loss + perturbation_loss + decoder_perturbation_loss) / 3 l.backward() self.scheduler.optimizer.step() loss = loss.item() perturbation_loss = perturbation_loss.item() decoder_perturbation_loss = decoder_perturbation_loss.item() gradient = torch.mean( torch.abs(list(self.model.parameters())[0].grad)) gradient = gradient.item() error = self.error(batch_classes[split:], output_classes[split:]) error = error.item() perturbation_error = self.error( batch_classes[split // 2:split], output_classes[split // 2:split]) perturbation_error = perturbation_error.item() decoder_perturbation_error = self.error( batch_classes[:split // 2], output_classes[:split // 2]) decoder_perturbation_error = decoder_perturbation_error.item() iterations = numpy.mean( success[success >= 0]) if numpy.sum(success >= 0) > 0 else -1 norm = numpy.mean( numpy.linalg.norm(perturbations.reshape( perturbations.shape[0], -1), axis=1, ord=self.norm)) success = numpy.sum(success >= 0) / self.args.batch_size decoder_iterations = numpy.mean( decoder_success[decoder_success >= 0]) if numpy.sum( decoder_success >= 0) > 0 else -1 decoder_norm = numpy.mean( numpy.linalg.norm(decoder_perturbations, axis=1, ord=self.norm)) decoder_success = numpy.sum( decoder_success >= 0) / self.args.batch_size iteration = self.epoch * num_batches + b + 1 self.train_statistics = numpy.vstack(( self.train_statistics, numpy.array([[ iteration, # iterations iteration * (1 + self.args.max_iterations) * self.args.batch_size, # samples seen min(num_batches, iteration) * self.args.batch_size + iteration * self.args.max_iterations * self.args.batch_size, # unique samples seen loss, error, perturbation_loss, perturbation_error, decoder_perturbation_loss, decoder_perturbation_error, success, iterations, norm, decoder_success, decoder_iterations, decoder_norm, gradient ]]))) if b % self.args.skip == self.args.skip // 2: log('[Training] %d | %d: %g (%g) %g (%g) %g (%g) [%g]' % ( self.epoch, b, numpy.mean(self.train_statistics[ max(0, iteration - self.args.skip):iteration, 3]), numpy.mean(self.train_statistics[ max(0, iteration - self.args.skip):iteration, 4]), numpy.mean(self.train_statistics[ max(0, iteration - self.args.skip):iteration, 5]), numpy.mean(self.train_statistics[ max(0, iteration - self.args.skip):iteration, 6]), numpy.mean(self.train_statistics[ max(0, iteration - self.args.skip):iteration, 7]), numpy.mean(self.train_statistics[ max(0, iteration - self.args.skip):iteration, 8]), numpy.mean(self.train_statistics[ max(0, iteration - self.args.skip):iteration, -1]), )) log('[Training] %d | %d: %g (%g, %g) %g (%g, %g)' % ( self.epoch, b, numpy.mean(self.train_statistics[ max(0, iteration - self.args.skip):iteration, 9]), numpy.mean(self.train_statistics[ max(0, iteration - self.args.skip):iteration, 10]), numpy.mean(self.train_statistics[ max(0, iteration - self.args.skip):iteration, 11]), numpy.mean(self.train_statistics[ max(0, iteration - self.args.skip):iteration, 12]), numpy.mean(self.train_statistics[ max(0, iteration - self.args.skip):iteration, 13]), numpy.mean(self.train_statistics[ max(0, iteration - self.args.skip):iteration, 14]), )) self.debug('clean.%d.png' % self.epoch, batch_images.permute(0, 2, 3, 1)) self.debug('perturbed.%d.png' % self.epoch, batch_perturbed_images1.permute(0, 2, 3, 1)) self.debug('perturbed2.%d.png' % self.epoch, batch_perturbed_images2.permute(0, 2, 3, 1)) self.debug('perturbation.%d.png' % self.epoch, batch_perturbations1.permute(0, 2, 3, 1), cmap='seismic') self.debug('perturbation2.%d.png' % self.epoch, batch_perturbations2.permute(0, 2, 3, 1), cmap='seismic')
def test(self): """ Test the model. """ self.model.eval() log('[Training] %d set classifier to eval' % self.epoch) assert self.model.training is False loss = error = perturbation_loss = perturbation_error = success = iterations = norm = 0 num_batches = int( math.ceil(self.args.test_samples / self.args.batch_size)) for b in range(num_batches): perm = numpy.take(range(self.args.test_samples), range(b * self.args.batch_size, (b + 1) * self.args.batch_size), mode='clip') batch_images = common.torch.as_variable(self.test_images[perm], self.args.use_gpu) batch_classes = common.torch.as_variable( self.test_codes[perm, self.args.label_index], self.args.use_gpu) batch_images = batch_images.permute(0, 3, 1, 2) output_classes = self.model(batch_images) e = self.loss(batch_classes, output_classes) loss += e.item() a = self.error(batch_classes, output_classes) error = a.item() loss /= num_batches error /= num_batches num_batches = int( math.ceil(self.args.attack_samples / self.args.batch_size)) assert self.args.attack_samples > 0 and self.args.attack_samples <= self.test_images.shape[ 0] for b in range(num_batches): perm = numpy.take(range(self.args.attack_samples), range(b * self.args.batch_size, (b + 1) * self.args.batch_size), mode='clip') batch_theta = common.torch.as_variable(self.test_theta[perm], self.args.use_gpu) batch_fonts = self.test_codes[perm, 1] batch_classes = self.test_codes[perm, self.args.label_index] batch_code = numpy.concatenate( (common.numpy.one_hot(batch_fonts, self.N_font), common.numpy.one_hot(batch_classes, self.N_class)), axis=1).astype(numpy.float32) batch_code = common.torch.as_variable(batch_code, self.args.use_gpu) batch_classes = common.torch.as_variable(batch_classes, self.args.use_gpu) objective = self.objective_class() self.decoder.set_code(batch_code) attack = self.setup_attack(self.decoder_classifier, batch_theta, batch_classes) attack.set_bound(torch.from_numpy(self.min_bound), torch.from_numpy(self.max_bound)) s, p, _, _, _ = attack.run(objective, False) batch_perturbed_theta = batch_theta + common.torch.as_variable( p, self.args.use_gpu) batch_perturbed_images = self.decoder(batch_perturbed_theta) output_classes = self.model(batch_perturbed_images) e = self.loss(batch_classes, output_classes) perturbation_loss += e.item() e = self.error(batch_classes, output_classes) perturbation_error += e.item() iterations += numpy.mean( s[s >= 0]) if numpy.sum(s >= 0) > 0 else -1 norm += numpy.mean( numpy.linalg.norm(p.reshape(p.shape[0], -1), axis=1, ord=self.norm)) success += numpy.sum(s >= 0) / self.args.batch_size perturbation_loss /= num_batches perturbation_error /= num_batches success /= num_batches iterations /= num_batches success /= num_batches log('[Training] %d: test %g (%g) %g (%g)' % (self.epoch, loss, error, perturbation_loss, perturbation_error)) log('[Training] %d: test %g (%g, %g)' % (self.epoch, success, iterations, norm)) num_batches = int( math.ceil(self.train_images.shape[0] / self.args.batch_size)) iteration = self.epoch * num_batches self.test_statistics = numpy.vstack(( self.test_statistics, numpy.array([[ iteration, # iterations iteration * (1 + self.args.max_iterations) * self.args.batch_size, # samples seen min(num_batches, iteration) * self.args.batch_size + iteration * self.args.max_iterations * self.args.batch_size, # unique samples seen loss, error, perturbation_loss, perturbation_error, success, iterations, norm ]])))
def train(self): """ Train adversarially. """ split = self.args.batch_size // 2 num_batches = int( math.ceil(self.train_images.shape[0] / self.args.batch_size)) permutation = numpy.random.permutation(self.train_images.shape[0]) for b in range(num_batches): self.scheduler.update(self.epoch, float(b) / num_batches) perm = numpy.take(permutation, range(b * self.args.batch_size, (b + 1) * self.args.batch_size), mode='wrap') batch_images = common.torch.as_variable(self.train_images[perm], self.args.use_gpu) batch_theta = common.torch.as_variable(self.train_theta[perm], self.args.use_gpu) batch_images = batch_images.permute(0, 3, 1, 2) batch_fonts = self.train_codes[perm, 1] batch_classes = self.train_codes[perm, self.args.label_index] batch_code = numpy.concatenate( (common.numpy.one_hot(batch_fonts, self.N_font), common.numpy.one_hot(batch_classes, self.N_class)), axis=1).astype(numpy.float32) batch_code = common.torch.as_variable(batch_code, self.args.use_gpu) batch_classes = common.torch.as_variable(batch_classes, self.args.use_gpu) self.model.eval() assert self.model.training is False if self.args.full_variant: objective = self.objective_class() self.decoder.set_code(batch_code) attack = self.setup_attack(self.decoder_classifier, batch_theta, batch_classes) attack.set_bound(torch.from_numpy(self.min_bound), torch.from_numpy(self.max_bound)) success, perturbations, probabilities, norm, _ = attack.run( objective, self.args.verbose) batch_perturbed_theta = batch_theta + common.torch.as_variable( perturbations, self.args.use_gpu) batch_perturbed_images = self.decoder(batch_perturbed_theta) batch_perturbations = batch_perturbed_images - batch_images self.model.train() assert self.model.training is True output_classes = self.model(batch_perturbed_images) self.scheduler.optimizer.zero_grad() loss = self.loss(batch_classes, output_classes) loss.backward() self.scheduler.optimizer.step() loss = perturbation_loss = loss.item() gradient = torch.mean( torch.abs(list(self.model.parameters())[0].grad)) gradient = gradient.item() error = self.error(batch_classes, output_classes) error = perturbation_error = error.item() else: objective = self.objective_class() self.decoder.set_code(batch_code[split:]) attack = self.setup_attack(self.decoder_classifier, batch_theta[split:], batch_classes[split:]) attack.set_bound(torch.from_numpy(self.min_bound), torch.from_numpy(self.max_bound)) success, perturbations, probabilities, norm, _ = attack.run( objective, self.args.verbose) batch_perturbed_theta = batch_theta[ split:] + common.torch.as_variable(perturbations, self.args.use_gpu) batch_perturbed_images = self.decoder(batch_perturbed_theta) batch_perturbations = batch_perturbed_images - batch_images[ split:] self.model.train() assert self.model.training is True batch_input_images = torch.cat( (batch_images[:split], batch_perturbed_images), dim=0) output_classes = self.model(batch_input_images) self.scheduler.optimizer.zero_grad() loss = self.loss(batch_classes[:split], output_classes[:split]) perturbation_loss = self.loss(batch_classes[split:], output_classes[split:]) l = (loss + perturbation_loss) / 2 l.backward() self.scheduler.optimizer.step() loss = loss.item() perturbation_loss = perturbation_loss.item() gradient = torch.mean( torch.abs(list(self.model.parameters())[0].grad)) gradient = gradient.item() error = self.error(batch_classes[:split], output_classes[:split]) error = error.item() perturbation_error = self.error(batch_classes[split:], output_classes[split:]) perturbation_error = perturbation_error.item() iterations = numpy.mean( success[success >= 0]) if numpy.sum(success >= 0) > 0 else -1 norm = numpy.mean( numpy.linalg.norm(perturbations.reshape( perturbations.shape[0], -1), axis=1, ord=self.norm)) success = numpy.sum(success >= 0) / (self.args.batch_size // 2) iteration = self.epoch * num_batches + b + 1 self.train_statistics = numpy.vstack(( self.train_statistics, numpy.array([[ iteration, # iterations iteration * (1 + self.args.max_iterations) * self.args.batch_size, # samples seen min(num_batches, iteration) * self.args.batch_size + iteration * self.args.max_iterations * self.args.batch_size, # unique samples seen loss, error, perturbation_loss, perturbation_error, success, iterations, norm, gradient ]]))) if b % self.args.skip == self.args.skip // 2: log('[Training] %d | %d: %g (%g) %g (%g) [%g]' % ( self.epoch, b, numpy.mean(self.train_statistics[ max(0, iteration - self.args.skip):iteration, 3]), numpy.mean(self.train_statistics[ max(0, iteration - self.args.skip):iteration, 4]), numpy.mean(self.train_statistics[ max(0, iteration - self.args.skip):iteration, 5]), numpy.mean(self.train_statistics[ max(0, iteration - self.args.skip):iteration, 6]), numpy.mean(self.train_statistics[ max(0, iteration - self.args.skip):iteration, -1]), )) log('[Training] %d | %d: %g (%g, %g)' % ( self.epoch, b, numpy.mean(self.train_statistics[ max(0, iteration - self.args.skip):iteration, 7]), numpy.mean(self.train_statistics[ max(0, iteration - self.args.skip):iteration, 8]), numpy.mean(self.train_statistics[ max(0, iteration - self.args.skip):iteration, 9]), )) self.debug('clean.%d.png' % self.epoch, batch_images.permute(0, 2, 3, 1)) self.debug('perturbed.%d.png' % self.epoch, batch_perturbed_images.permute(0, 2, 3, 1)) self.debug('perturbation.%d.png' % self.epoch, batch_perturbations.permute(0, 2, 3, 1), cmap='seismic')
def test(self): """ Test the model. """ self.model.eval() log('[Training] %d set classifier to eval' % self.epoch) loss = error = perturbation_loss = perturbation_error = 0 num_batches = int( math.ceil(self.args.test_samples / self.args.batch_size)) assert self.model.training is False for b in range(num_batches): perm = numpy.take(range(self.args.test_samples), range(b * self.args.batch_size, (b + 1) * self.args.batch_size), mode='clip') batch_images = common.torch.as_variable(self.test_images[perm], self.args.use_gpu) batch_theta = common.torch.as_variable(self.test_theta[perm], self.args.use_gpu) batch_images = batch_images.permute(0, 3, 1, 2) batch_fonts = self.test_codes[perm, 1] batch_classes = self.test_codes[perm, self.args.label_index] batch_code = numpy.concatenate( (common.numpy.one_hot(batch_fonts, self.N_font), common.numpy.one_hot(batch_classes, self.N_class)), axis=1).astype(numpy.float32) batch_code = common.torch.as_variable(batch_code, self.args.use_gpu) batch_classes = common.torch.as_variable(batch_classes, self.args.use_gpu) output_classes = self.model(batch_images) e = self.loss(batch_classes, output_classes) loss += e.item() a = self.error(batch_classes, output_classes) error += a.item() if self.args.strong_variant: min_bound = numpy.repeat(self.min_bound.reshape(1, -1), batch_theta.size(0), axis=0) max_bound = numpy.repeat(self.max_bound.reshape(1, -1), batch_theta.size(0), axis=0) random = numpy.random.uniform( min_bound, max_bound, (batch_theta.size(0), batch_theta.size(1))) batch_perturbed_theta = common.torch.as_variable( random.astype(numpy.float32), self.args.use_gpu) self.decoder.set_code(batch_code) batch_perturbed_images = self.decoder(batch_perturbed_theta) else: random = common.numpy.uniform_ball(batch_theta.size(0), batch_theta.size(1), epsilon=self.args.epsilon, ord=self.norm) batch_perturbed_theta = batch_theta + common.torch.as_variable( random.astype(numpy.float32), self.args.use_gpu) batch_perturbed_theta = torch.min( common.torch.as_variable(self.max_bound, self.args.use_gpu), batch_perturbed_theta) batch_perturbed_theta = torch.max( common.torch.as_variable(self.min_bound, self.args.use_gpu), batch_perturbed_theta) self.decoder.set_code(batch_code) batch_perturbed_images = self.decoder(batch_perturbed_theta) output_classes = self.model(batch_perturbed_images) l = self.loss(batch_classes, output_classes) perturbation_loss += l.item() e = self.error(batch_classes, output_classes) perturbation_error += e.item() loss /= num_batches error /= num_batches perturbation_loss /= num_batches perturbation_error /= num_batches log('[Training] %d: test %g (%g) %g (%g)' % (self.epoch, loss, error, perturbation_loss, perturbation_error)) num_batches = int( math.ceil(self.train_images.shape[0] / self.args.batch_size)) iteration = self.epoch * num_batches self.test_statistics = numpy.vstack(( self.test_statistics, numpy.array([[ iteration, # iterations iteration * (1 + self.args.max_iterations) * self.args.batch_size, # samples seen min(num_batches, iteration) * self.args.batch_size + iteration * self.args.max_iterations * self.args.batch_size, # unique samples seen loss, error, perturbation_loss, perturbation_error ]])))
def train(self, epoch): """ Train for one epoch. :param epoch: current epoch :type epoch: int """ self.encoder.train() log('[Training] %d set encoder to train' % epoch) self.decoder.train() log('[Training] %d set decoder to train' % epoch) self.classifier.train() log('[Training] %d set classifier to train' % epoch) num_batches = int( math.ceil(self.train_images.shape[0] / self.args.batch_size)) assert self.encoder.training is True permutation = numpy.random.permutation(self.train_images.shape[0]) permutation = numpy.concatenate( (permutation, permutation[:self.args.batch_size]), axis=0) for b in range(num_batches): self.encoder_scheduler.update(epoch, float(b) / num_batches) self.decoder_scheduler.update(epoch, float(b) / num_batches) self.classifier_scheduler.update(epoch, float(b) / num_batches) perm = permutation[b * self.args.batch_size:(b + 1) * self.args.batch_size] batch_images = common.torch.as_variable(self.train_images[perm], self.args.use_gpu, True) batch_images = batch_images.permute(0, 3, 1, 2) output_mu, output_logvar = self.encoder(batch_images) output_codes = self.reparameterize(output_mu, output_logvar) output_images = self.decoder(output_codes) output_real_classes = self.classifier(batch_images) output_reconstructed_classes = self.classifier(output_images) latent_loss = self.latent_loss(output_mu, output_logvar) reconstruction_loss = self.reconstruction_loss( batch_images, output_images) decoder_loss = self.decoder_loss(output_reconstructed_classes) discriminator_loss = self.discriminator_loss( output_real_classes, output_reconstructed_classes) self.encoder_scheduler.optimizer.zero_grad() loss = latent_loss + self.args.beta * reconstruction_loss + self.args.gamma * decoder_loss + self.args.eta * torch.sum( torch.abs(output_logvar)) loss.backward(retain_graph=True) self.encoder_scheduler.optimizer.step() self.decoder_scheduler.optimizer.zero_grad() loss = self.args.beta * reconstruction_loss + self.args.gamma * decoder_loss loss.backward(retain_graph=True) self.decoder_scheduler.optimizer.step() self.classifier_scheduler.optimizer.zero_grad() loss = self.args.gamma * discriminator_loss loss.backward() self.classifier_scheduler.optimizer.step() reconstruction_error = self.reconstruction_error( batch_images, output_images) iteration = epoch * num_batches + b + 1 self.train_statistics = numpy.vstack( (self.train_statistics, numpy.array([ iteration, iteration * self.args.batch_size, min(num_batches, iteration), min(num_batches, iteration) * self.args.batch_size, reconstruction_loss.data, reconstruction_error.data, latent_loss.data, torch.mean(output_mu).item(), torch.var(output_mu).item(), torch.mean(output_logvar).item(), decoder_loss.item(), discriminator_loss.item(), torch.mean( torch.abs(list( self.encoder.parameters())[0].grad)).item(), torch.mean( torch.abs(list( self.decoder.parameters())[0].grad)).item(), torch.mean( torch.abs(list( self.classifier.parameters())[0].grad)).item() ]))) skip = 10 if b % skip == skip // 2: log('[Training] %d | %d: %g (%g) %g (%g, %g, %g)' % ( epoch, b, numpy.mean(self.train_statistics[max(0, iteration - skip):iteration, 4]), numpy.mean(self.train_statistics[max(0, iteration - skip):iteration, 5]), numpy.mean(self.train_statistics[max(0, iteration - skip):iteration, 6]), numpy.mean(self.train_statistics[max(0, iteration - skip):iteration, 7]), numpy.mean(self.train_statistics[max(0, iteration - skip):iteration, 8]), numpy.mean(self.train_statistics[max(0, iteration - skip):iteration, 9]), )) log('[Training] %d | %d: %g %g (%g, %g, %g)' % ( epoch, b, numpy.mean(self.train_statistics[max(0, iteration - skip):iteration, 10]), numpy.mean(self.train_statistics[max(0, iteration - skip):iteration, 11]), numpy.mean(self.train_statistics[max(0, iteration - skip):iteration, 12]), numpy.mean(self.train_statistics[max(0, iteration - skip):iteration, 13]), numpy.mean(self.train_statistics[max(0, iteration - skip):iteration, 14]), ))
def attack(self): """ Test the model. """ assert self.model is not None assert self.model.training is False assert self.test_images.shape[0] == self.test_codes.shape[0], 'number of samples has to match' concatenate_axis = -1 if os.path.exists(self.args.perturbations_file) and os.path.exists(self.args.success_file): self.original_perturbations = utils.read_hdf5(self.args.perturbations_file) if self.test_images.shape[3] > 1: assert len(self.original_perturbations.shape) == 5 else: assert len(self.original_perturbations.shape) == 4 log('[Attack] read %s' % self.args.perturbations_file) self.original_success = utils.read_hdf5(self.args.success_file) log('[Attack] read %s' % self.args.success_file) assert self.original_perturbations.shape[0] == self.original_success.shape[0] assert self.original_perturbations.shape[1] == self.original_success.shape[1] assert self.original_perturbations.shape[2] == self.test_images.shape[1] assert self.original_perturbations.shape[3] == self.test_images.shape[2]# if self.original_perturbations.shape[1] >= self.args.max_samples and self.original_perturbations.shape[0] >= self.args.max_attempts: log('[Attack] found %d attempts, %d samples, requested no more' % (self.original_perturbations.shape[0], self.original_perturbations.shape[1])) return elif self.original_perturbations.shape[0] == self.args.max_attempts or self.original_perturbations.shape[1] == self.args.max_samples: if self.original_perturbations.shape[0] == self.args.max_attempts: self.test_images = self.test_images[self.original_perturbations.shape[1]:] self.test_codes = self.test_codes[self.original_perturbations.shape[1]:] self.args.max_samples = self.args.max_samples - self.original_perturbations.shape[1] concatenate_axis = 1 log('[Attack] found %d attempts with %d perturbations, computing %d more perturbations' % (self.original_perturbations.shape[0], self.original_perturbations.shape[1], self.args.max_samples)) elif self.original_perturbations.shape[1] == self.args.max_samples: self.args.max_attempts = self.args.max_attempts - self.original_perturbations.shape[0] concatenate_axis = 0 log('[Attack] found %d attempts with %d perturbations, computing %d more attempts' % (self.original_perturbations.shape[0], self.original_perturbations.shape[1], self.args.max_attempts)) # can't squeeze here! if self.test_images.shape[3] > 1: self.perturbations = numpy.zeros((self.args.max_attempts, self.args.max_samples, self.test_images.shape[1], self.test_images.shape[2], self.test_images.shape[3])) else: self.perturbations = numpy.zeros((self.args.max_attempts, self.args.max_samples, self.test_images.shape[1], self.test_images.shape[2])) self.success = numpy.ones((self.args.max_attempts, self.args.max_samples), dtype=int) * -1 if self.args.attack.find('Batch') >= 0: batch_size = min(self.args.batch_size, self.args.max_samples) else: batch_size = 1 objective = self.objective_class() num_batches = int(math.ceil(self.args.max_samples/batch_size)) for i in range(num_batches): # self.test_images.shape[0] if i*batch_size == self.args.max_samples: break i_start = i*batch_size i_end = min((i+1)*batch_size, self.args.max_samples) batch_images = common.torch.as_variable(self.test_images[i_start: i_end], self.args.use_gpu) batch_classes = common.torch.as_variable(numpy.array(self.test_codes[i_start: i_end]), self.args.use_gpu) batch_images = batch_images.permute(0, 3, 1, 2) t = 0 while t < self.args.max_attempts: attack = self.setup_attack(batch_images, batch_classes) success, perturbations, probabilities, norm, _ = attack.run(objective) assert not numpy.any(perturbations != perturbations), perturbations # Note that we save the perturbed image, not only the perturbation! self.perturbations[t][i_start: i_end] = numpy.squeeze(numpy.transpose(perturbations + batch_images.cpu().numpy(), (0, 2, 3, 1))) self.success[t][i_start: i_end] = success # IMPORTANT: The adversarial examples are not considering whether the classifier is # actually correct to start with. t += 1 log('[Attack] %d: completed' % i) if concatenate_axis >= 0: if self.perturbations.shape[0] == self.args.max_attempts: self.perturbations = numpy.concatenate((self.original_perturbations, self.perturbations), axis=concatenate_axis) self.success = numpy.concatenate((self.original_success, self.success), axis=concatenate_axis) log('[Attack] concatenated') utils.write_hdf5(self.args.perturbations_file, self.perturbations) log('[Attack] wrote %s' % self.args.perturbations_file) utils.write_hdf5(self.args.success_file, self.success) log('[Attack] wrote %s' % self.args.success_file)
def compute_statistics(self): """ Compute statistics based on distances. """ num_attempts = self.perturbations.shape[0] perturbations = numpy.swapaxes(self.perturbations, 0, 1) perturbations = perturbations.reshape( (perturbations.shape[0] * perturbations.shape[1], perturbations.shape[2])) success = numpy.swapaxes(self.success, 0, 1) success = success.reshape((success.shape[0] * success.shape[1])) probabilities = numpy.swapaxes(self.probabilities, 0, 1) probabilities = probabilities.reshape( (probabilities.shape[0] * probabilities.shape[1], -1)) confidences = numpy.max(probabilities, 1) perturbation_probabilities = self.test_probabilities[:self.success. shape[1]] perturbation_probabilities = numpy.repeat(perturbation_probabilities, num_attempts, axis=0) perturbation_confidences = numpy.max(perturbation_probabilities, 1) probability_ratios = confidences / perturbation_confidences raw_overall_success = success >= 0 log('[Testing] %d valid attacks' % numpy.sum(raw_overall_success)) # For off-manifold attacks this should not happen, but save is save. if not numpy.any(raw_overall_success): for type in [ 'raw_success', 'raw_iteration', 'raw_roc', 'raw_confidence_weighted_success', 'raw_confidence', 'raw_ratios' ]: self.results[type] = 0 if self.args.results_file: utils.write_pickle(self.args.results_file, self.results) log('[Testing] wrote %s' % self.args.results_file) log('[Testing] no successful attacks found, no plots') return # # We compute some simple statistics: # - raw success rate: fraction of successful attack without considering epsilon # - corrected success rate: fraction of successful attacks within epsilon-ball # - raw average perturbation: average distance to original samples (for successful attacks) # - corrected average perturbation: average distance to original samples for perturbations # within epsilon-ball (for successful attacks). # These statistics can also be computed per class. # And these statistics are computed with respect to three norms. if self.args.plot_directory and utils.display(): iterations = success[raw_overall_success] x = numpy.arange(numpy.max(iterations) + 1) y = numpy.bincount(iterations) plot_file = os.path.join(self.args.plot_directory, 'iterations') plot.bar(plot_file, x, y, title='Distribution of Iterations of Successful Attacks', xlabel='Number of Iterations', ylabel='Count') log('[Testing] wrote %s' % plot_file) plot_file = os.path.join(self.args.plot_directory, 'probabilities') plot.histogram(plot_file, confidences[raw_overall_success], 50) log('[Testing] wrote %s' % plot_file) plot_file = os.path.join(self.args.plot_directory, 'probability_ratios') plot.histogram(plot_file, probability_ratios, 50) log('[Testing] wrote %s' % plot_file) plot_file = os.path.join(self.args.plot_directory, 'test_probabilities') plot.histogram( plot_file, self.test_probabilities[ numpy.arange(self.test_probabilities.shape[0]), self.test_codes], 50) log('[Testing] wrote %s' % plot_file) y_true = numpy.concatenate( (numpy.zeros(confidences.shape[0]), numpy.ones(perturbation_confidences.shape[0]))) y_score = numpy.concatenate((confidences, perturbation_confidences)) roc_auc_score = sklearn.metrics.roc_auc_score(y_true, y_score) self.results['raw_roc'] = roc_auc_score self.results['raw_confidence_weighted_success'] = numpy.sum( confidences[raw_overall_success]) / numpy.sum( perturbation_confidences) self.results['raw_confidence'] = numpy.mean( probabilities[raw_overall_success]) self.results['raw_ratios'] = numpy.mean( probability_ratios[raw_overall_success]) self.results['raw_success'] = numpy.sum( raw_overall_success) / success.shape[0] self.results['raw_iteration'] = numpy.average( success[raw_overall_success]) if self.args.results_file: utils.write_pickle(self.args.results_file, self.results) log('[Testing] wrote %s' % self.args.results_file)
def attack(self): """ Test the model. """ assert self.model is not None assert self.model.classifier.training is False concatenate_axis = -1 if os.path.exists(self.args.perturbations_file) and os.path.exists( self.args.success_file): self.original_perturbations = utils.read_hdf5( self.args.perturbations_file) assert len(self.original_perturbations.shape) == 3 log('[Attack] read %s' % self.args.perturbations_file) self.original_success = utils.read_hdf5(self.args.success_file) log('[Attack] read %s' % self.args.success_file) assert self.original_perturbations.shape[ 0] == self.original_success.shape[0] assert self.original_perturbations.shape[ 1] == self.original_success.shape[1] if self.original_perturbations.shape[ 1] <= self.args.max_samples and self.original_perturbations.shape[ 0] <= self.args.max_attempts: log('[Attack] found %d attempts, %d samples, requested no more' % (self.original_perturbations.shape[0], self.original_perturbations.shape[1])) return elif self.original_perturbations.shape[ 0] == self.args.max_attempts or self.original_perturbations.shape[ 1] == self.args.max_samples: if self.original_perturbations.shape[ 0] == self.args.max_attempts: self.test_images = self.test_images[ self.original_perturbations.shape[1]:] self.test_codes = self.test_codes[ self.original_perturbations.shape[1]:] self.args.max_samples = self.args.max_samples - self.original_perturbations.shape[ 1] concatenate_axis = 1 log('[Attack] found %d attempts with %d perturbations, computing %d more perturbations' % (self.original_perturbations.shape[0], self.original_perturbations.shape[1], self.args.max_samples)) elif self.original_perturbations.shape[ 1] == self.args.max_samples: self.args.max_attempts = self.args.max_attempts - self.original_perturbations.shape[ 0] concatenate_axis = 0 log('[Attack] found %d attempts with %d perturbations, computing %d more attempts' % (self.original_perturbations.shape[0], self.original_perturbations.shape[1], self.args.max_attempts)) self.perturbations = numpy.zeros( (self.args.max_attempts, self.args.max_samples, self.args.N_theta)) self.success = numpy.ones( (self.args.max_attempts, self.args.max_samples), dtype=int) * -1 if self.args.attack.find('Batch') >= 0: batch_size = min(self.args.batch_size, self.args.max_samples) else: batch_size = 1 objective = self.objective_class() num_batches = int(math.ceil(self.args.max_samples / batch_size)) for i in range(num_batches): if i * batch_size == self.args.max_samples: break i_start = i * batch_size i_end = min((i + 1) * batch_size, self.args.max_samples) batch_classes = common.torch.as_variable( self.test_codes[i_start:i_end], self.args.use_gpu) batch_theta = common.torch.as_variable( numpy.zeros((i_end - i_start, self.args.N_theta), dtype=numpy.float32), self.args.use_gpu) if self.args.N_theta > 4: batch_theta[:, 4] = 1 batch_images = common.torch.as_variable( self.test_images[i_start:i_end], self.args.use_gpu) batch_images = batch_images.permute(0, 3, 1, 2) self.model.decoder.set_image(batch_images) #output_images = self.model.decoder.forward(batch_theta) #error = torch.sum(torch.abs(output_images - batch_images)) #error = error.item() #print(error) #from matplotlib import pyplot #output_images = numpy.squeeze(numpy.transpose(output_images.cpu().detach().numpy(), (0, 2, 3, 1))) #pyplot.imshow(output_images[0]) #pyplot.show() t = 0 while True and t < self.args.max_attempts: attack = self.setup_attack(batch_theta, batch_classes) success, perturbations, probabilities, norm, _ = attack.run( objective) assert not numpy.any( perturbations != perturbations), perturbations # Note that we save the perturbed image, not only the perturbation! perturbations = perturbations.reshape(batch_theta.size( )) # hack for when only one dimensional latent space is used! self.perturbations[t][ i_start:i_end] = perturbations + batch_theta.cpu().detach( ).numpy() self.success[t][i_start:i_end] = success t += 1 log('[Attack] %d: completed' % i) if concatenate_axis >= 0: if self.perturbations.shape[0] == self.args.max_attempts: self.perturbations = numpy.concatenate( (self.original_perturbations, self.perturbations), axis=concatenate_axis) self.success = numpy.concatenate( (self.original_success, self.success), axis=concatenate_axis) log('[Attack] concatenated') utils.write_hdf5(self.args.perturbations_file, self.perturbations) log('[Attack] wrote %s' % self.args.perturbations_file) utils.write_hdf5(self.args.success_file, self.success) log('[Attack] wrote %s' % self.args.success_file)