def compute_ppca(self): """ Compute PPCA. """ success = numpy.logical_and(self.success >= 0, self.accuracy) log('[Detection] %d valid attacked samples' % numpy.sum(success)) nearest_neighbor_images = self.nearest_neighbor_images.reshape(self.nearest_neighbor_images.shape[0], -1) nearest_neighbor_images = nearest_neighbor_images[:self.args.n_fit] perturbations = self.perturbations.reshape(self.perturbations.shape[0], -1) test_images = self.test_images.reshape(self.test_images.shape[0], -1) pure_perturbations = perturbations - test_images ppca = PPCA(n_components=self.args.n_pca) ppca.fit(nearest_neighbor_images) log('[Experiment] computed PPCA on nearest neighbor images') reconstructed_test_images = ppca.inverse_transform(ppca.transform(test_images)) reconstructed_perturbations = ppca.inverse_transform(ppca.transform(perturbations)) reconstructed_pure_perturbations = ppca.inverse_transform(ppca.transform(pure_perturbations)) self.distances['test'] = numpy.average(numpy.multiply(reconstructed_test_images - test_images, reconstructed_test_images - test_images), axis=1) self.distances['perturbation'] = numpy.average(numpy.multiply(reconstructed_perturbations - perturbations, reconstructed_perturbations - perturbations), axis=1) self.distances['true'] = numpy.average(numpy.multiply(reconstructed_pure_perturbations - pure_perturbations, reconstructed_pure_perturbations - pure_perturbations), axis=1) self.angles['test'] = numpy.rad2deg(common.numpy.angles(test_images.T, reconstructed_test_images.T)) self.angles['perturbation'] = numpy.rad2deg(common.numpy.angles(reconstructed_perturbations.T, perturbations.T)) self.angles['true'] = numpy.rad2deg(common.numpy.angles(reconstructed_pure_perturbations.T, pure_perturbations.T)) self.distances['test'] = self.distances['test'][success] self.distances['perturbation'] = self.distances['perturbation'][success] self.distances['true'] = self.distances['true'][success]
def compute_normalized_ppca(self): """ Compute PPCA. """ nearest_neighbor_images = self.nearest_neighbor_images.reshape(self.nearest_neighbor_images.shape[0], -1) nearest_neighbor_images = nearest_neighbor_images[:self.args.n_fit] perturbations = self.perturbations.reshape(self.perturbations.shape[0], -1) test_images = self.test_images.reshape(self.test_images.shape[0], -1) pure_perturbations = perturbations - test_images nearest_neighbor_images_norms = numpy.linalg.norm(nearest_neighbor_images, ord=2, axis=1) perturbations_norms = numpy.linalg.norm(perturbations, ord=2, axis=1) test_images_norms = numpy.linalg.norm(test_images, ord=2, axis=1) pure_perturbations_norms = numpy.linalg.norm(pure_perturbations, ord=2, axis=1) success = numpy.logical_and(numpy.logical_and(self.success >= 0, self.accuracy), pure_perturbations_norms > 1e-4) log('[Detection] %d valid attacked samples' % numpy.sum(success)) perturbations_norms = perturbations_norms[success] test_images_norms = test_images_norms[success] pure_perturbations_norms = pure_perturbations_norms[success] perturbations = perturbations[success] test_images = test_images[success] pure_perturbations = pure_perturbations[success] nearest_neighbor_images /= numpy.repeat(nearest_neighbor_images_norms.reshape(-1, 1), nearest_neighbor_images.shape[1], axis=1) perturbations /= numpy.repeat(perturbations_norms.reshape(-1, 1), perturbations.shape[1], axis=1) test_images /= numpy.repeat(test_images_norms.reshape(-1, 1), test_images.shape[1], axis=1) pure_perturbations /= numpy.repeat(pure_perturbations_norms.reshape(-1, 1), pure_perturbations.shape[1], axis=1) assert not numpy.any(nearest_neighbor_images != nearest_neighbor_images) assert not numpy.any(perturbations != perturbations) assert not numpy.any(test_images != test_images) assert not numpy.any(pure_perturbations != pure_perturbations) ppca = PPCA(n_components=self.args.n_pca) ppca.fit(nearest_neighbor_images) log('[Experiment] computed PPCA on nearest neighbor images') reconstructed_test_images = ppca.inverse_transform(ppca.transform(test_images)) reconstructed_perturbations = ppca.inverse_transform(ppca.transform(perturbations)) reconstructed_pure_perturbations = ppca.inverse_transform(ppca.transform(pure_perturbations)) #self.probabilities['test'] = ppca.marginal(test_images) #self.probabilities['perturbation'] = ppca.marginal(perturbations) #self.probabilities['true'] = ppca.marginal(pure_perturbations) self.distances['test'] = numpy.average(numpy.multiply(reconstructed_test_images - test_images, reconstructed_test_images - test_images), axis=1) self.distances['perturbation'] = numpy.average(numpy.multiply(reconstructed_perturbations - perturbations, reconstructed_perturbations - perturbations), axis=1) self.distances['true'] = numpy.average(numpy.multiply(reconstructed_pure_perturbations - pure_perturbations, reconstructed_pure_perturbations - pure_perturbations), axis=1) self.angles['test'] = numpy.rad2deg(common.numpy.angles(test_images.T, reconstructed_test_images.T)) self.angles['perturbation'] = numpy.rad2deg(common.numpy.angles(reconstructed_perturbations.T, perturbations.T)) self.angles['true'] = numpy.rad2deg(common.numpy.angles(reconstructed_pure_perturbations.T, pure_perturbations.T))
def compute_local_pca(self): """ Compute PCA. """ success = numpy.logical_and(self.success >= 0, self.accuracy) log('[Detection] %d valid attacked samples' % numpy.sum(success)) nearest_neighbor_images = self.nearest_neighbor_images.reshape(self.nearest_neighbor_images.shape[0], -1) nearest_neighbor_images = nearest_neighbor_images[:self.args.n_fit] perturbations = self.perturbations.reshape(self.perturbations.shape[0], -1) test_images = self.test_images.reshape(self.test_images.shape[0], -1) pure_perturbations = perturbations - test_images nearest_neighbors_indices = self.compute_nearest_neighbors(perturbations) self.distances['true'] = numpy.zeros((success.shape[0])) self.distances['test'] = numpy.zeros((success.shape[0])) self.distances['perturbation'] = numpy.zeros((success.shape[0])) self.angles['true'] = numpy.zeros((success.shape[0])) self.angles['test'] = numpy.zeros((success.shape[0])) self.angles['perturbation'] = numpy.zeros((success.shape[0])) for n in range(pure_perturbations.shape[0]): if success[n]: nearest_neighbors = nearest_neighbor_images[nearest_neighbors_indices[n, :]] nearest_neighbors = numpy.concatenate((nearest_neighbors, test_images[n].reshape(1, -1)), axis=0) pca = sklearn.decomposition.IncrementalPCA(n_components=self.args.n_pca) pca.fit(nearest_neighbors) reconstructed_test_images = pca.inverse_transform(pca.transform(test_images[n].reshape(1, -1))) reconstructed_perturbations = pca.inverse_transform(pca.transform(perturbations[n].reshape(1, -1))) reconstructed_pure_perturbations = pca.inverse_transform(pca.transform(pure_perturbations[n].reshape(1, -1))) self.distances['test'][n] = numpy.average(numpy.multiply(reconstructed_test_images - test_images[n], reconstructed_test_images - test_images[n]), axis=1) self.distances['perturbation'][n] = numpy.average(numpy.multiply(reconstructed_perturbations - perturbations[n], reconstructed_perturbations - perturbations[n]), axis=1) self.distances['true'][n] = numpy.average(numpy.multiply(reconstructed_pure_perturbations - pure_perturbations[n], reconstructed_pure_perturbations - pure_perturbations[n]), axis=1) self.angles['test'][n] = numpy.rad2deg(common.numpy.angles(reconstructed_test_images.T, test_images[n].T)) self.angles['perturbation'][n] = numpy.rad2deg(common.numpy.angles(reconstructed_perturbations.T, perturbations[n].T)) self.angles['true'][n] = numpy.rad2deg(common.numpy.angles(reconstructed_pure_perturbations.T, pure_perturbations[n].T)) log('[Detection] %d: true distance=%g angle=%g' % (n, self.distances['true'][n], self.angles['true'][n])) log('[Detection] %d: perturbation distance=%g angle=%g' % (n, self.distances['perturbation'][n], self.angles['perturbation'][n])) log('[Detection] %d: test distance=%g angle=%g' % (n, self.distances['test'][n], self.angles['test'][n])) self.distances['test'] = self.distances['test'][success] self.distances['perturbation'] = self.distances['perturbation'][success] self.distances['true'] = self.distances['true'][success]
def compute_statistics(self): """ Compute statistics based on distances. """ num_attempts = self.perturbations.shape[0] perturbations = numpy.swapaxes(self.perturbations, 0, 1) perturbations = perturbations.reshape( (perturbations.shape[0] * perturbations.shape[1], perturbations.shape[2])) success = numpy.swapaxes(self.success, 0, 1) success = success.reshape((success.shape[0] * success.shape[1])) probabilities = numpy.swapaxes(self.probabilities, 0, 1) probabilities = probabilities.reshape( (probabilities.shape[0] * probabilities.shape[1], -1)) confidences = numpy.max(probabilities, 1) perturbation_probabilities = self.test_probabilities[:self.success. shape[1]] perturbation_probabilities = numpy.repeat(perturbation_probabilities, num_attempts, axis=0) perturbation_confidences = numpy.max(perturbation_probabilities, 1) probability_ratios = confidences / perturbation_confidences raw_overall_success = success >= 0 log('[Testing] %d valid attacks' % numpy.sum(raw_overall_success)) # For off-manifold attacks this should not happen, but save is save. if not numpy.any(raw_overall_success): for type in [ 'raw_success', 'raw_iteration', 'raw_roc', 'raw_confidence_weighted_success', 'raw_confidence', 'raw_ratios' ]: self.results[type] = 0 if self.args.results_file: utils.write_pickle(self.args.results_file, self.results) log('[Testing] wrote %s' % self.args.results_file) log('[Testing] no successful attacks found, no plots') return # # We compute some simple statistics: # - raw success rate: fraction of successful attack without considering epsilon # - corrected success rate: fraction of successful attacks within epsilon-ball # - raw average perturbation: average distance to original samples (for successful attacks) # - corrected average perturbation: average distance to original samples for perturbations # within epsilon-ball (for successful attacks). # These statistics can also be computed per class. # And these statistics are computed with respect to three norms. if self.args.plot_directory and utils.display(): iterations = success[raw_overall_success] x = numpy.arange(numpy.max(iterations) + 1) y = numpy.bincount(iterations) plot_file = os.path.join(self.args.plot_directory, 'iterations') plot.bar(plot_file, x, y, title='Distribution of Iterations of Successful Attacks', xlabel='Number of Iterations', ylabel='Count') log('[Testing] wrote %s' % plot_file) plot_file = os.path.join(self.args.plot_directory, 'probabilities') plot.histogram(plot_file, confidences[raw_overall_success], 50) log('[Testing] wrote %s' % plot_file) plot_file = os.path.join(self.args.plot_directory, 'probability_ratios') plot.histogram(plot_file, probability_ratios, 50) log('[Testing] wrote %s' % plot_file) plot_file = os.path.join(self.args.plot_directory, 'test_probabilities') plot.histogram( plot_file, self.test_probabilities[ numpy.arange(self.test_probabilities.shape[0]), self.test_codes], 50) log('[Testing] wrote %s' % plot_file) y_true = numpy.concatenate( (numpy.zeros(confidences.shape[0]), numpy.ones(perturbation_confidences.shape[0]))) y_score = numpy.concatenate((confidences, perturbation_confidences)) roc_auc_score = sklearn.metrics.roc_auc_score(y_true, y_score) self.results['raw_roc'] = roc_auc_score self.results['raw_confidence_weighted_success'] = numpy.sum( confidences[raw_overall_success]) / numpy.sum( perturbation_confidences) self.results['raw_confidence'] = numpy.mean( probabilities[raw_overall_success]) self.results['raw_ratios'] = numpy.mean( probability_ratios[raw_overall_success]) self.results['raw_success'] = numpy.sum( raw_overall_success) / success.shape[0] self.results['raw_iteration'] = numpy.average( success[raw_overall_success]) if self.args.results_file: utils.write_pickle(self.args.results_file, self.results) log('[Testing] wrote %s' % self.args.results_file)
def compute_statistics(self): """ Compute statistics based on distances. """ # That's the basis for all computation as we only want to consider successful attacks # on test samples that were correctly classified. raw_overall_success = numpy.logical_and(self.success >= 0, self.accuracy) # Important check, for on-manifold attack this will happen if the manifold is small and the model very accurate! if not numpy.any(raw_overall_success): for n in range(len(self.norms)): for type in ['raw_success', 'raw_iteration', 'raw_average', 'raw_image']: self.results[n][type] = 0 for type in ['raw_class_success', 'raw_class_average', 'raw_class_image']: self.results[n][type] = numpy.zeros((self.N_class)) if self.args.results_file: utils.write_pickle(self.args.results_file, self.results) log('[Testing] wrote %s' % self.args.results_file) return # # Compute nearest neighbor statistics in image space. # if self.args.plot_directory and self.args.plot_manifolds and utils.display(): log('[Testing] computing nearest neighbor ...') nearest_neighbors_indices = self.compute_nearest_neighbors(self.perturbation_images[raw_overall_success]) pure_perturbations = self.test_images[raw_overall_success] - self.perturbation_images[raw_overall_success] pure_perturbations_norm = numpy.linalg.norm(pure_perturbations, ord=2, axis=1) for k in range(10): direction = self.perturbation_images[raw_overall_success] - self.train_images[nearest_neighbors_indices[:, k]] direction_norm = numpy.linalg.norm(direction, ord=2, axis=1) dot_products = numpy.einsum('ij,ij->i', direction, pure_perturbations) dot_product_norms = numpy.multiply(pure_perturbations_norm, direction_norm) dot_products, dot_product_norms = dot_products[dot_product_norms > 10**-8], dot_product_norms[dot_product_norms > 10**-8] dot_products /= dot_product_norms dot_products = numpy.degrees(numpy.arccos(dot_products)) # matplotlib's hsitogram plots give weird error if there are NaN values, so simple check: if dot_products.shape[0] > 0 and not numpy.any(dot_products != dot_products): plot_file = os.path.join(self.args.plot_directory, 'dot_products_nn%d' % k) plot.histogram(plot_file, dot_products, 100, xmin=numpy.min(dot_products), xmax=numpy.max(dot_products), title='Dot Products Between Adversarial Perturbations and Direction to Nearest Neighbor %d' % k, xlabel='Dot Product', ylabel='Count') log('[Testing] wrote %s' % plot_file) # # We compute some simple statistics: # - raw success rate: fraction of successful attack without considering epsilon # - corrected success rate: fraction of successful attacks within epsilon-ball # - raw average perturbation: average distance to original samples (for successful attacks) # - corrected average perturbation: average distance to original samples for perturbations # within epsilon-ball (for successful attacks). # These statistics can also be computed per class. # And these statistics are computed with respect to three norms. if self.args.plot_directory and utils.display(): iterations = self.success[raw_overall_success] x = numpy.arange(numpy.max(iterations) + 1) y = numpy.bincount(iterations) plot_file = os.path.join(self.args.plot_directory, 'iterations') plot.bar(plot_file, x, y, title='Distribution of Iterations of Successful Attacks', xlabel='Number of Iterations', ylabel='Count') log('[Testing] wrote %s' % plot_file) reference_perturbations = numpy.zeros(self.perturbations.shape) if self.args.N_theta > 4: reference_perturbations[:, 4] = 1 for n in range(len(self.norms)): norm = self.norms[n] delta = numpy.linalg.norm(self.perturbations - reference_perturbations, norm, axis=1) image_delta = numpy.linalg.norm(self.test_images - self.perturbation_images, norm, axis=1) if self.args.plot_directory and utils.display(): plot_file = os.path.join(self.args.plot_directory, 'distances_l%g' % norm) plot.histogram(plot_file, delta[raw_overall_success], 50, title='Distribution of $L_{%g}$ Distances of Successful Attacks' % norm, xlabel='Distance', ylabel='Count') log('[Testing] wrote %s' % plot_file) debug_accuracy = numpy.sum(self.accuracy) / self.accuracy.shape[0] debug_attack_fraction = numpy.sum(raw_overall_success) / numpy.sum(self.success >= 0) debug_test_fraction = numpy.sum(raw_overall_success) / numpy.sum(self.accuracy) log('[Testing] attacked mode accuracy: %g' % debug_accuracy) log('[Testing] only %g of successful attacks are valid' % debug_attack_fraction) log('[Testing] only %g of correct samples are successfully attacked' % debug_test_fraction) N_accuracy = numpy.sum(self.accuracy) self.results[n]['raw_success'] = numpy.sum(raw_overall_success) / N_accuracy self.results[n]['raw_iteration'] = numpy.average(self.success[raw_overall_success]) self.results[n]['raw_average'] = numpy.average(delta[raw_overall_success]) if numpy.any(raw_overall_success) else 0 self.results[n]['raw_image'] = numpy.average(image_delta[raw_overall_success]) if numpy.any(raw_overall_success) else 0 raw_class_success = numpy.zeros((self.N_class, self.perturbation_codes.shape[0]), bool) corrected_class_success = numpy.zeros((self.N_class, self.perturbation_codes.shape[0]), bool) self.results[n]['raw_class_success'] = numpy.zeros((self.N_class)) self.results[n]['raw_class_average'] = numpy.zeros((self.N_class)) self.results[n]['raw_class_image'] = numpy.zeros((self.N_class)) for c in range(self.N_class): N_samples = numpy.sum(self.accuracy[self.perturbation_codes == c].astype(int)) if N_samples <= 0: continue; raw_class_success[c] = numpy.logical_and(raw_overall_success, self.perturbation_codes == c) self.results[n]['raw_class_success'][c] = numpy.sum(raw_class_success[c]) / N_samples if numpy.any(raw_class_success[c]): self.results[n]['raw_class_average'][c] = numpy.average(delta[raw_class_success[c].astype(bool)]) if numpy.any(corrected_class_success[c]): self.results[n]['raw_class_image'][c] = numpy.average(image_delta[raw_class_success[c].astype(bool)]) if self.args.results_file: utils.write_pickle(self.args.results_file, self.results) log('[Testing] wrote %s' % self.args.results_file)
def compute_nn(self, inclusive=False): """ Test detector. """ success = numpy.logical_and(self.success >= 0, self.accuracy) log('[Detection] %d valid attacked samples' % numpy.sum(success)) nearest_neighbor_images = self.nearest_neighbor_images.reshape(self.nearest_neighbor_images.shape[0], -1) perturbations = self.perturbations.reshape(self.perturbations.shape[0], -1) test_images = self.test_images.reshape(self.test_images.shape[0], -1) nearest_neighbors_indices = self.compute_nearest_neighbors(perturbations) pure_perturbations = perturbations - test_images log('[Detection] computed nearest neighbors for perturbations') self.distances['true'] = numpy.zeros((success.shape[0])) self.distances['test'] = numpy.zeros((success.shape[0])) self.distances['perturbation'] = numpy.zeros((success.shape[0])) self.angles['true'] = numpy.zeros((success.shape[0])) self.angles['test'] = numpy.zeros((success.shape[0])) self.angles['perturbation'] = numpy.zeros((success.shape[0])) for n in range(pure_perturbations.shape[0]): if success[n]: nearest_neighbors = nearest_neighbor_images[nearest_neighbors_indices[n, :]] if inclusive: nearest_neighbors = numpy.concatenate((nearest_neighbors, test_images[n].reshape(1, -1)), axis=0) nearest_neighbor_mean = test_images[n] else: nearest_neighbor_mean = numpy.average(nearest_neighbors, axis=0) nearest_neighbor_basis = nearest_neighbors - nearest_neighbor_mean relative_perturbation = perturbations[n] - nearest_neighbor_mean relative_test_image = test_images[n] - nearest_neighbor_mean if inclusive: assert numpy.allclose(relative_test_image, nearest_neighbor_basis[-1]) nearest_neighbor_vectors = numpy.stack(( pure_perturbations[n], relative_perturbation, relative_test_image ), axis=1) nearest_neighbor_projections = common.numpy.project_orthogonal(nearest_neighbor_basis.T, nearest_neighbor_vectors) assert nearest_neighbor_vectors.shape[0] == nearest_neighbor_projections.shape[0] assert nearest_neighbor_vectors.shape[1] == nearest_neighbor_projections.shape[1] angles = numpy.rad2deg(common.numpy.angles(nearest_neighbor_vectors, nearest_neighbor_projections)) distances = numpy.linalg.norm(nearest_neighbor_vectors - nearest_neighbor_projections, ord=2, axis=0) assert distances.shape[0] == 3 assert angles.shape[0] == 3 self.distances['true'][n] = distances[0] self.distances['perturbation'][n] = distances[1] self.distances['test'][n] = distances[2] self.angles['true'][n] = angles[0] self.angles['perturbation'][n] = angles[1] self.angles['test'][n] = angles[2] log('[Detection] %d: true distance=%g angle=%g' % (n, self.distances['true'][n], self.angles['true'][n])) log('[Detection] %d: perturbation distance=%g angle=%g' % (n, self.distances['perturbation'][n], self.angles['perturbation'][n])) log('[Detection] %d: test distance=%g angle=%g' % (n, self.distances['test'][n], self.angles['test'][n])) self.distances['true'] = self.distances['true'][success] self.distances['test'] = self.distances['test'][success] self.distances['perturbation'] = self.distances['perturbation'][success] self.angles['true'] = self.angles['true'][success] self.angles['test'] = self.angles['test'][success] self.angles['perturbation'] = self.angles['perturbation'][success] if inclusive: self.distances['test'][:] = 0 self.angles['test'][:] = 0