def plot_manifolds(self): """ Plot manifolds. """ # # Plot all classes and adversarial examples in image space for individual classes as well as all classes. # fit = self.test_codes.shape[0]//25 test_images = self.test_images.reshape((self.test_images.shape[0], -1)) manifold_visualization = plot.ManifoldVisualization('tsne', pre_pca=40) manifold_visualization.fit(test_images[:fit]) log('[Testing] computed t-SNE on test images') for n in range(self.N_class): labels = ['Class %d' % (nn + 1) for nn in range(self.N_class)] + ['Adversarial Examples Class %d' % (n + 1)] data = numpy.concatenate(( test_images[:fit], self.perturbation_images[self.perturbation_codes == n] )) classes = numpy.concatenate(( self.test_codes[:fit], numpy.ones((self.perturbation_images[self.perturbation_codes == n].shape[0])) * 10, )) plot_file = os.path.join(self.args.plot_directory, 'perturbations_%d' % (n + 1)) manifold_visualization.visualize(plot_file, data, classes, labels, title='Adversarial Examples Class %d\n(The adversarial examples are projected into the embedding using learned SVRs)' % n) log('[Testing] wrote %s' % plot_file) labels = ['Class %d' % (n + 1) for n in range(self.N_class)] + ['Adversarial Examples Class %d' % (n + 1) for n in range(self.N_class)] data = numpy.concatenate(( test_images[:fit], self.perturbation_images )) classes = numpy.concatenate(( self.test_codes[:fit], self.perturbation_codes + 10, )) plot_file = os.path.join(self.args.plot_directory, 'perturbations') manifold_visualization.visualize(plot_file, data, classes, labels, title='Adversarial Examples\n(The adversarial examples are projected into the embedding using learned SVRs)') log('[Testing] wrote %s' % plot_file)
def compute_statistics(self): """ Compute statistics based on distances. """ num_attempts = self.perturbations.shape[0] perturbations = numpy.swapaxes(self.perturbations, 0, 1) perturbations = perturbations.reshape( (perturbations.shape[0] * perturbations.shape[1], perturbations.shape[2])) success = numpy.swapaxes(self.success, 0, 1) success = success.reshape((success.shape[0] * success.shape[1])) probabilities = numpy.swapaxes(self.probabilities, 0, 1) probabilities = probabilities.reshape( (probabilities.shape[0] * probabilities.shape[1], -1)) confidences = numpy.max(probabilities, 1) perturbation_probabilities = self.test_probabilities[:self.success. shape[1]] perturbation_probabilities = numpy.repeat(perturbation_probabilities, num_attempts, axis=0) perturbation_confidences = numpy.max(perturbation_probabilities, 1) probability_ratios = confidences / perturbation_confidences raw_overall_success = success >= 0 log('[Testing] %d valid attacks' % numpy.sum(raw_overall_success)) # For off-manifold attacks this should not happen, but save is save. if not numpy.any(raw_overall_success): for type in [ 'raw_success', 'raw_iteration', 'raw_roc', 'raw_confidence_weighted_success', 'raw_confidence', 'raw_ratios' ]: self.results[type] = 0 if self.args.results_file: utils.write_pickle(self.args.results_file, self.results) log('[Testing] wrote %s' % self.args.results_file) log('[Testing] no successful attacks found, no plots') return # # We compute some simple statistics: # - raw success rate: fraction of successful attack without considering epsilon # - corrected success rate: fraction of successful attacks within epsilon-ball # - raw average perturbation: average distance to original samples (for successful attacks) # - corrected average perturbation: average distance to original samples for perturbations # within epsilon-ball (for successful attacks). # These statistics can also be computed per class. # And these statistics are computed with respect to three norms. if self.args.plot_directory and utils.display(): iterations = success[raw_overall_success] x = numpy.arange(numpy.max(iterations) + 1) y = numpy.bincount(iterations) plot_file = os.path.join(self.args.plot_directory, 'iterations') plot.bar(plot_file, x, y, title='Distribution of Iterations of Successful Attacks', xlabel='Number of Iterations', ylabel='Count') log('[Testing] wrote %s' % plot_file) plot_file = os.path.join(self.args.plot_directory, 'probabilities') plot.histogram(plot_file, confidences[raw_overall_success], 50) log('[Testing] wrote %s' % plot_file) plot_file = os.path.join(self.args.plot_directory, 'probability_ratios') plot.histogram(plot_file, probability_ratios, 50) log('[Testing] wrote %s' % plot_file) plot_file = os.path.join(self.args.plot_directory, 'test_probabilities') plot.histogram( plot_file, self.test_probabilities[ numpy.arange(self.test_probabilities.shape[0]), self.test_codes], 50) log('[Testing] wrote %s' % plot_file) y_true = numpy.concatenate( (numpy.zeros(confidences.shape[0]), numpy.ones(perturbation_confidences.shape[0]))) y_score = numpy.concatenate((confidences, perturbation_confidences)) roc_auc_score = sklearn.metrics.roc_auc_score(y_true, y_score) self.results['raw_roc'] = roc_auc_score self.results['raw_confidence_weighted_success'] = numpy.sum( confidences[raw_overall_success]) / numpy.sum( perturbation_confidences) self.results['raw_confidence'] = numpy.mean( probabilities[raw_overall_success]) self.results['raw_ratios'] = numpy.mean( probability_ratios[raw_overall_success]) self.results['raw_success'] = numpy.sum( raw_overall_success) / success.shape[0] self.results['raw_iteration'] = numpy.average( success[raw_overall_success]) if self.args.results_file: utils.write_pickle(self.args.results_file, self.results) log('[Testing] wrote %s' % self.args.results_file)
def attack(self): """ Test the model. """ assert self.model is not None assert self.model.classifier.training is False concatenate_axis = -1 if os.path.exists(self.args.perturbations_file) and os.path.exists(self.args.success_file): self.original_perturbations = utils.read_hdf5(self.args.perturbations_file) assert len(self.original_perturbations.shape) == 3, self.original_perturbations.shape log('[Attack] read %s' % self.args.perturbations_file) self.original_success = utils.read_hdf5(self.args.success_file) log('[Attack] read %s' % self.args.success_file) assert self.original_perturbations.shape[0] == self.original_success.shape[0] assert self.original_perturbations.shape[1] == self.original_success.shape[1] assert self.original_perturbations.shape[2] == self.test_theta.shape[1] if self.original_perturbations.shape[1] <= self.args.max_samples and self.original_perturbations.shape[0] <= self.args.max_attempts: log('[Attack] found %d attempts, %d samples, requested no more' % (self.original_perturbations.shape[0], self.original_perturbations.shape[1])) return elif self.original_perturbations.shape[0] == self.args.max_attempts or self.original_perturbations.shape[1] == self.args.max_samples: if self.original_perturbations.shape[0] == self.args.max_attempts: self.test_theta = self.test_theta[self.original_perturbations.shape[1]:] self.test_fonts = self.test_fonts[self.original_perturbations.shape[1]:] self.test_classes = self.test_classes[self.original_perturbations.shape[1]:] self.args.max_samples = self.args.max_samples - self.original_perturbations.shape[1] concatenate_axis = 1 log('[Attack] found %d attempts with %d perturbations, computing %d more perturbations' % ( self.original_perturbations.shape[0], self.original_perturbations.shape[1], self.args.max_samples)) elif self.original_perturbations.shape[1] == self.args.max_samples: self.args.max_attempts = self.args.max_attempts - self.original_perturbations.shape[0] concatenate_axis = 0 log('[Attack] found %d attempts with %d perturbations, computing %d more attempts' % ( self.original_perturbations.shape[0], self.original_perturbations.shape[1], self.args.max_attempts)) self.perturbations = numpy.zeros((self.args.max_attempts, self.args.max_samples, self.test_theta.shape[1])) self.success = numpy.ones((self.args.max_attempts, self.args.max_samples), dtype=int) * -1 if self.args.attack.find('Batch') >= 0: batch_size = min(self.args.batch_size, self.args.max_samples) else: batch_size = 1 objective = self.objective_class() num_batches = int(math.ceil(self.args.max_samples/batch_size)) for i in range(num_batches): if i*batch_size == self.args.max_samples: break i_start = i * batch_size i_end = min((i + 1) * batch_size, self.args.max_samples) batch_fonts = self.test_fonts[i_start: i_end] batch_classes = self.test_classes[i_start: i_end] batch_code = numpy.concatenate((common.numpy.one_hot(batch_fonts, self.N_font), common.numpy.one_hot(batch_classes, self.N_class)), axis=1).astype(numpy.float32) batch_classes = common.torch.as_variable(batch_classes, self.args.use_gpu) batch_inputs = common.torch.as_variable(self.test_theta[i_start: i_end], self.args.use_gpu) batch_code = common.torch.as_variable(batch_code, self.args.use_gpu) t = 0 # This basically allows to only optimize over theta, keeping the font/class code fixed. self.model.decoder.set_code(batch_code) while True and t < self.args.max_attempts: attack = self.setup_attack(batch_inputs, batch_classes) success, perturbations, probabilities, norm, _ = attack.run(objective) assert not numpy.any(perturbations != perturbations), perturbations # Note that we save the perturbed image, not only the perturbation! perturbations = perturbations.reshape(batch_inputs.size()) # hack for when only one dimensional latent space is used! self.perturbations[t][i_start: i_end] = perturbations + batch_inputs.cpu().numpy() self.success[t][i_start: i_end] = success t += 1 log('[Attack] %d: completed' % i) if concatenate_axis >= 0: if self.perturbations.shape[0] == self.args.max_attempts: self.perturbations = numpy.concatenate((self.original_perturbations, self.perturbations), axis=concatenate_axis) self.success = numpy.concatenate((self.original_success, self.success), axis=concatenate_axis) log('[Attack] concatenated') utils.write_hdf5(self.args.perturbations_file, self.perturbations) log('[Attack] wrote %s' % self.args.perturbations_file) utils.write_hdf5(self.args.success_file, self.success) log('[Attack] wrote %s' % self.args.success_file)
def attack(self): """ Test the model. """ assert self.model is not None assert self.model.training is False assert self.test_images.shape[0] == self.test_codes.shape[0], 'number of samples has to match' concatenate_axis = -1 if os.path.exists(self.args.perturbations_file) and os.path.exists(self.args.success_file): self.original_perturbations = utils.read_hdf5(self.args.perturbations_file) if self.test_images.shape[3] > 1: assert len(self.original_perturbations.shape) == 5 else: assert len(self.original_perturbations.shape) == 4 log('[Attack] read %s' % self.args.perturbations_file) self.original_success = utils.read_hdf5(self.args.success_file) log('[Attack] read %s' % self.args.success_file) assert self.original_perturbations.shape[0] == self.original_success.shape[0] assert self.original_perturbations.shape[1] == self.original_success.shape[1] assert self.original_perturbations.shape[2] == self.test_images.shape[1] assert self.original_perturbations.shape[3] == self.test_images.shape[2]# if self.original_perturbations.shape[1] >= self.args.max_samples and self.original_perturbations.shape[0] >= self.args.max_attempts: log('[Attack] found %d attempts, %d samples, requested no more' % (self.original_perturbations.shape[0], self.original_perturbations.shape[1])) return elif self.original_perturbations.shape[0] == self.args.max_attempts or self.original_perturbations.shape[1] == self.args.max_samples: if self.original_perturbations.shape[0] == self.args.max_attempts: self.test_images = self.test_images[self.original_perturbations.shape[1]:] self.test_codes = self.test_codes[self.original_perturbations.shape[1]:] self.args.max_samples = self.args.max_samples - self.original_perturbations.shape[1] concatenate_axis = 1 log('[Attack] found %d attempts with %d perturbations, computing %d more perturbations' % (self.original_perturbations.shape[0], self.original_perturbations.shape[1], self.args.max_samples)) elif self.original_perturbations.shape[1] == self.args.max_samples: self.args.max_attempts = self.args.max_attempts - self.original_perturbations.shape[0] concatenate_axis = 0 log('[Attack] found %d attempts with %d perturbations, computing %d more attempts' % (self.original_perturbations.shape[0], self.original_perturbations.shape[1], self.args.max_attempts)) # can't squeeze here! if self.test_images.shape[3] > 1: self.perturbations = numpy.zeros((self.args.max_attempts, self.args.max_samples, self.test_images.shape[1], self.test_images.shape[2], self.test_images.shape[3])) else: self.perturbations = numpy.zeros((self.args.max_attempts, self.args.max_samples, self.test_images.shape[1], self.test_images.shape[2])) self.success = numpy.ones((self.args.max_attempts, self.args.max_samples), dtype=int) * -1 if self.args.attack.find('Batch') >= 0: batch_size = min(self.args.batch_size, self.args.max_samples) else: batch_size = 1 objective = self.objective_class() num_batches = int(math.ceil(self.args.max_samples/batch_size)) for i in range(num_batches): # self.test_images.shape[0] if i*batch_size == self.args.max_samples: break i_start = i*batch_size i_end = min((i+1)*batch_size, self.args.max_samples) batch_images = common.torch.as_variable(self.test_images[i_start: i_end], self.args.use_gpu) batch_classes = common.torch.as_variable(numpy.array(self.test_codes[i_start: i_end]), self.args.use_gpu) batch_images = batch_images.permute(0, 3, 1, 2) t = 0 while t < self.args.max_attempts: attack = self.setup_attack(batch_images, batch_classes) success, perturbations, probabilities, norm, _ = attack.run(objective) assert not numpy.any(perturbations != perturbations), perturbations # Note that we save the perturbed image, not only the perturbation! self.perturbations[t][i_start: i_end] = numpy.squeeze(numpy.transpose(perturbations + batch_images.cpu().numpy(), (0, 2, 3, 1))) self.success[t][i_start: i_end] = success # IMPORTANT: The adversarial examples are not considering whether the classifier is # actually correct to start with. t += 1 log('[Attack] %d: completed' % i) if concatenate_axis >= 0: if self.perturbations.shape[0] == self.args.max_attempts: self.perturbations = numpy.concatenate((self.original_perturbations, self.perturbations), axis=concatenate_axis) self.success = numpy.concatenate((self.original_success, self.success), axis=concatenate_axis) log('[Attack] concatenated') utils.write_hdf5(self.args.perturbations_file, self.perturbations) log('[Attack] wrote %s' % self.args.perturbations_file) utils.write_hdf5(self.args.success_file, self.success) log('[Attack] wrote %s' % self.args.success_file)
def attack(self): """ Test the model. """ assert self.model is not None assert self.model.training is False if self.args.attack.find('Batch') >= 0: batch_size = min(self.args.batch_size, self.args.max_samples) else: batch_size = 1 objective = self.objective_class() num_batches = int(math.ceil(self.args.max_samples / batch_size)) # can't squeeze here! if self.test_images.shape[3] > 1: self.perturbations = numpy.zeros( (self.args.max_attempts, self.args.max_samples, self.test_images.shape[1], self.test_images.shape[2], self.test_images.shape[3])) else: self.perturbations = numpy.zeros( (self.args.max_attempts, self.args.max_samples, self.test_images.shape[1], self.test_images.shape[2])) self.success = numpy.ones( (self.args.max_attempts, self.args.max_samples), dtype=int) * -1 self.probabilities = numpy.zeros( (self.args.max_attempts, self.args.max_samples, self.N_class)) for i in range(num_batches): # self.test_images.shape[0] if i * batch_size == self.args.max_samples: break i_start = i * batch_size i_end = min((i + 1) * batch_size, self.args.max_samples) batch_images = numpy.random.randint(0, 255, size=[batch_size] + self.test_images.shape[1:]) batch_images = common.torch.as_variable(batch_images, self.args.use_gpu) batch_images = batch_images.permute(0, 3, 1, 2) batch_classes = common.torch.as_variable( numpy.random.randint(0, self.N_class - 1, size=(batch_images.size(0))), self.args.use_gpu) t = 0 while t < self.args.max_attempts: attack = self.setup_attack(batch_images, batch_classes) success, perturbations, probabilities, norm, _ = attack.run( objective) assert not numpy.any( perturbations != perturbations), perturbations # Note that we save the perturbed image, not only the perturbation! self.perturbations[t][i_start:i_end] = numpy.squeeze( numpy.transpose(perturbations + batch_images.cpu().numpy(), (0, 2, 3, 1))) self.success[t][i_start:i_end] = success self.probabilities[t][i_start:i_end] = probabilities # IMPORTANT: The adversarial examples are not considering whether the classifier is # actually correct to start with. t += 1 log('[Attack] %d: completed' % i) utils.write_hdf5(self.args.perturbations_file, self.perturbations) log('[Attack] wrote %s' % self.args.perturbations_file) utils.write_hdf5(self.args.success_file, self.success) log('[Attack] wrote %s' % self.args.success_file) utils.write_hdf5(self.args.probabilities_file, self.probabilities) log('[Attack] wrote %s' % self.args.probabilities_file)
def attack(self): """ Test the model. """ assert self.model is not None assert self.model.classifier.training is False concatenate_axis = -1 if os.path.exists(self.args.perturbations_file) and os.path.exists( self.args.success_file): self.original_perturbations = utils.read_hdf5( self.args.perturbations_file) assert len(self.original_perturbations.shape) == 3 log('[Attack] read %s' % self.args.perturbations_file) self.original_success = utils.read_hdf5(self.args.success_file) log('[Attack] read %s' % self.args.success_file) assert self.original_perturbations.shape[ 0] == self.original_success.shape[0] assert self.original_perturbations.shape[ 1] == self.original_success.shape[1] if self.original_perturbations.shape[ 1] <= self.args.max_samples and self.original_perturbations.shape[ 0] <= self.args.max_attempts: log('[Attack] found %d attempts, %d samples, requested no more' % (self.original_perturbations.shape[0], self.original_perturbations.shape[1])) return elif self.original_perturbations.shape[ 0] == self.args.max_attempts or self.original_perturbations.shape[ 1] == self.args.max_samples: if self.original_perturbations.shape[ 0] == self.args.max_attempts: self.test_images = self.test_images[ self.original_perturbations.shape[1]:] self.test_codes = self.test_codes[ self.original_perturbations.shape[1]:] self.args.max_samples = self.args.max_samples - self.original_perturbations.shape[ 1] concatenate_axis = 1 log('[Attack] found %d attempts with %d perturbations, computing %d more perturbations' % (self.original_perturbations.shape[0], self.original_perturbations.shape[1], self.args.max_samples)) elif self.original_perturbations.shape[ 1] == self.args.max_samples: self.args.max_attempts = self.args.max_attempts - self.original_perturbations.shape[ 0] concatenate_axis = 0 log('[Attack] found %d attempts with %d perturbations, computing %d more attempts' % (self.original_perturbations.shape[0], self.original_perturbations.shape[1], self.args.max_attempts)) self.perturbations = numpy.zeros( (self.args.max_attempts, self.args.max_samples, self.args.N_theta)) self.success = numpy.ones( (self.args.max_attempts, self.args.max_samples), dtype=int) * -1 if self.args.attack.find('Batch') >= 0: batch_size = min(self.args.batch_size, self.args.max_samples) else: batch_size = 1 objective = self.objective_class() num_batches = int(math.ceil(self.args.max_samples / batch_size)) for i in range(num_batches): if i * batch_size == self.args.max_samples: break i_start = i * batch_size i_end = min((i + 1) * batch_size, self.args.max_samples) batch_classes = common.torch.as_variable( self.test_codes[i_start:i_end], self.args.use_gpu) batch_theta = common.torch.as_variable( numpy.zeros((i_end - i_start, self.args.N_theta), dtype=numpy.float32), self.args.use_gpu) if self.args.N_theta > 4: batch_theta[:, 4] = 1 batch_images = common.torch.as_variable( self.test_images[i_start:i_end], self.args.use_gpu) batch_images = batch_images.permute(0, 3, 1, 2) self.model.decoder.set_image(batch_images) #output_images = self.model.decoder.forward(batch_theta) #error = torch.sum(torch.abs(output_images - batch_images)) #error = error.item() #print(error) #from matplotlib import pyplot #output_images = numpy.squeeze(numpy.transpose(output_images.cpu().detach().numpy(), (0, 2, 3, 1))) #pyplot.imshow(output_images[0]) #pyplot.show() t = 0 while True and t < self.args.max_attempts: attack = self.setup_attack(batch_theta, batch_classes) success, perturbations, probabilities, norm, _ = attack.run( objective) assert not numpy.any( perturbations != perturbations), perturbations # Note that we save the perturbed image, not only the perturbation! perturbations = perturbations.reshape(batch_theta.size( )) # hack for when only one dimensional latent space is used! self.perturbations[t][ i_start:i_end] = perturbations + batch_theta.cpu().detach( ).numpy() self.success[t][i_start:i_end] = success t += 1 log('[Attack] %d: completed' % i) if concatenate_axis >= 0: if self.perturbations.shape[0] == self.args.max_attempts: self.perturbations = numpy.concatenate( (self.original_perturbations, self.perturbations), axis=concatenate_axis) self.success = numpy.concatenate( (self.original_success, self.success), axis=concatenate_axis) log('[Attack] concatenated') utils.write_hdf5(self.args.perturbations_file, self.perturbations) log('[Attack] wrote %s' % self.args.perturbations_file) utils.write_hdf5(self.args.success_file, self.success) log('[Attack] wrote %s' % self.args.success_file)