def test(self): """ Test the model. """ assert self.encoder is not None and self.decoder is not None self.encoder.eval() log('[Testing] set encoder to eval') self.decoder.eval() log('[Testing] set decoder to eval') if self.args.train_theta_file or self.train_reconstruction_file: self.test_train() self.test_test() if self.args.random_file: self.test_random() if self.args.interpolation_file: self.test_interpolation() self.results = { 'reconstruction_error': self.reconstruction_error, 'code_mean': self.code_mean, 'code_var': self.code_var, } if self.args.results_file: utils.write_pickle(self.args.results_file, self.results) log('[Testing] wrote %s' % self.args.results_file)
def loop(self): """ Main loop for training and testing, saving ... """ while self.epoch < self.args.epochs: log('[Training] %s' % self.scheduler.report()) # Note that we test first, to also get the error of the untrained model. testing = elapsed(functools.partial(self.test)) training = elapsed(functools.partial(self.train)) log('[Training] %gs training, %gs testing' % (training, testing)) if self.args.early_stopping: validation = elapsed(functools.partial(self.validate)) log('[Training] %gs validation' % validation) # Save model checkpoint after each epoch. utils.remove(self.args.state_file + '.%d' % (self.epoch - 1)) State.checkpoint(self.model, self.scheduler.optimizer, self.epoch, self.args.state_file + '.%d' % self.epoch) log('[Training] %d: checkpoint' % self.epoch) torch.cuda.empty_cache() # necessary? # Save statistics and plots. if self.args.training_file: utils.write_hdf5(self.args.training_file, self.train_statistics) log('[Training] %d: wrote %s' % (self.epoch, self.args.training_file)) if self.args.testing_file: utils.write_hdf5(self.args.testing_file, self.test_statistics) log('[Training] %d: wrote %s' % (self.epoch, self.args.testing_file)) if utils.display(): self.plot() self.epoch += 1 # ! # Final testing. testing = elapsed(functools.partial(self.test)) log('[Training] %gs testing' % (testing)) # Save model checkpoint after each epoch. utils.remove(self.args.state_file + '.%d' % (self.epoch - 1)) State.checkpoint(self.model, self.scheduler.optimizer, self.epoch, self.args.state_file) log('[Training] %d: checkpoint' % self.epoch) self.results = { 'training_statistics': self.train_statistics, 'testing_statistics': self.test_statistics, } if self.args.results_file: utils.write_pickle(self.args.results_file, self.results) log('[Training] wrote %s' % self.args.results_file)
def compute_statistics(self): """ Compute statistics based on distances. """ num_attempts = self.perturbations.shape[0] perturbations = numpy.swapaxes(self.perturbations, 0, 1) perturbations = perturbations.reshape( (perturbations.shape[0] * perturbations.shape[1], perturbations.shape[2])) success = numpy.swapaxes(self.success, 0, 1) success = success.reshape((success.shape[0] * success.shape[1])) probabilities = numpy.swapaxes(self.probabilities, 0, 1) probabilities = probabilities.reshape( (probabilities.shape[0] * probabilities.shape[1], -1)) confidences = numpy.max(probabilities, 1) perturbation_probabilities = self.test_probabilities[:self.success. shape[1]] perturbation_probabilities = numpy.repeat(perturbation_probabilities, num_attempts, axis=0) perturbation_confidences = numpy.max(perturbation_probabilities, 1) probability_ratios = confidences / perturbation_confidences raw_overall_success = success >= 0 log('[Testing] %d valid attacks' % numpy.sum(raw_overall_success)) # For off-manifold attacks this should not happen, but save is save. if not numpy.any(raw_overall_success): for type in [ 'raw_success', 'raw_iteration', 'raw_roc', 'raw_confidence_weighted_success', 'raw_confidence', 'raw_ratios' ]: self.results[type] = 0 if self.args.results_file: utils.write_pickle(self.args.results_file, self.results) log('[Testing] wrote %s' % self.args.results_file) log('[Testing] no successful attacks found, no plots') return # # We compute some simple statistics: # - raw success rate: fraction of successful attack without considering epsilon # - corrected success rate: fraction of successful attacks within epsilon-ball # - raw average perturbation: average distance to original samples (for successful attacks) # - corrected average perturbation: average distance to original samples for perturbations # within epsilon-ball (for successful attacks). # These statistics can also be computed per class. # And these statistics are computed with respect to three norms. if self.args.plot_directory and utils.display(): iterations = success[raw_overall_success] x = numpy.arange(numpy.max(iterations) + 1) y = numpy.bincount(iterations) plot_file = os.path.join(self.args.plot_directory, 'iterations') plot.bar(plot_file, x, y, title='Distribution of Iterations of Successful Attacks', xlabel='Number of Iterations', ylabel='Count') log('[Testing] wrote %s' % plot_file) plot_file = os.path.join(self.args.plot_directory, 'probabilities') plot.histogram(plot_file, confidences[raw_overall_success], 50) log('[Testing] wrote %s' % plot_file) plot_file = os.path.join(self.args.plot_directory, 'probability_ratios') plot.histogram(plot_file, probability_ratios, 50) log('[Testing] wrote %s' % plot_file) plot_file = os.path.join(self.args.plot_directory, 'test_probabilities') plot.histogram( plot_file, self.test_probabilities[ numpy.arange(self.test_probabilities.shape[0]), self.test_codes], 50) log('[Testing] wrote %s' % plot_file) y_true = numpy.concatenate( (numpy.zeros(confidences.shape[0]), numpy.ones(perturbation_confidences.shape[0]))) y_score = numpy.concatenate((confidences, perturbation_confidences)) roc_auc_score = sklearn.metrics.roc_auc_score(y_true, y_score) self.results['raw_roc'] = roc_auc_score self.results['raw_confidence_weighted_success'] = numpy.sum( confidences[raw_overall_success]) / numpy.sum( perturbation_confidences) self.results['raw_confidence'] = numpy.mean( probabilities[raw_overall_success]) self.results['raw_ratios'] = numpy.mean( probability_ratios[raw_overall_success]) self.results['raw_success'] = numpy.sum( raw_overall_success) / success.shape[0] self.results['raw_iteration'] = numpy.average( success[raw_overall_success]) if self.args.results_file: utils.write_pickle(self.args.results_file, self.results) log('[Testing] wrote %s' % self.args.results_file)
def compute_statistics(self): """ Compute statistics based on distances. """ # That's the basis for all computation as we only want to consider successful attacks # on test samples that were correctly classified. raw_overall_success = numpy.logical_and(self.success >= 0, self.accuracy) # Important check, for on-manifold attack this will happen if the manifold is small and the model very accurate! if not numpy.any(raw_overall_success): for n in range(len(self.norms)): for type in ['raw_success', 'raw_iteration', 'raw_average', 'raw_image']: self.results[n][type] = 0 for type in ['raw_class_success', 'raw_class_average', 'raw_class_image']: self.results[n][type] = numpy.zeros((self.N_class)) if self.args.results_file: utils.write_pickle(self.args.results_file, self.results) log('[Testing] wrote %s' % self.args.results_file) return # # Compute nearest neighbor statistics in image space. # if self.args.plot_directory and self.args.plot_manifolds and utils.display(): log('[Testing] computing nearest neighbor ...') nearest_neighbors_indices = self.compute_nearest_neighbors(self.perturbation_images[raw_overall_success]) pure_perturbations = self.test_images[raw_overall_success] - self.perturbation_images[raw_overall_success] pure_perturbations_norm = numpy.linalg.norm(pure_perturbations, ord=2, axis=1) for k in range(10): direction = self.perturbation_images[raw_overall_success] - self.train_images[nearest_neighbors_indices[:, k]] direction_norm = numpy.linalg.norm(direction, ord=2, axis=1) dot_products = numpy.einsum('ij,ij->i', direction, pure_perturbations) dot_product_norms = numpy.multiply(pure_perturbations_norm, direction_norm) dot_products, dot_product_norms = dot_products[dot_product_norms > 10**-8], dot_product_norms[dot_product_norms > 10**-8] dot_products /= dot_product_norms dot_products = numpy.degrees(numpy.arccos(dot_products)) # matplotlib's hsitogram plots give weird error if there are NaN values, so simple check: if dot_products.shape[0] > 0 and not numpy.any(dot_products != dot_products): plot_file = os.path.join(self.args.plot_directory, 'dot_products_nn%d' % k) plot.histogram(plot_file, dot_products, 100, xmin=numpy.min(dot_products), xmax=numpy.max(dot_products), title='Dot Products Between Adversarial Perturbations and Direction to Nearest Neighbor %d' % k, xlabel='Dot Product', ylabel='Count') log('[Testing] wrote %s' % plot_file) # # We compute some simple statistics: # - raw success rate: fraction of successful attack without considering epsilon # - corrected success rate: fraction of successful attacks within epsilon-ball # - raw average perturbation: average distance to original samples (for successful attacks) # - corrected average perturbation: average distance to original samples for perturbations # within epsilon-ball (for successful attacks). # These statistics can also be computed per class. # And these statistics are computed with respect to three norms. if self.args.plot_directory and utils.display(): iterations = self.success[raw_overall_success] x = numpy.arange(numpy.max(iterations) + 1) y = numpy.bincount(iterations) plot_file = os.path.join(self.args.plot_directory, 'iterations') plot.bar(plot_file, x, y, title='Distribution of Iterations of Successful Attacks', xlabel='Number of Iterations', ylabel='Count') log('[Testing] wrote %s' % plot_file) reference_perturbations = numpy.zeros(self.perturbations.shape) if self.args.N_theta > 4: reference_perturbations[:, 4] = 1 for n in range(len(self.norms)): norm = self.norms[n] delta = numpy.linalg.norm(self.perturbations - reference_perturbations, norm, axis=1) image_delta = numpy.linalg.norm(self.test_images - self.perturbation_images, norm, axis=1) if self.args.plot_directory and utils.display(): plot_file = os.path.join(self.args.plot_directory, 'distances_l%g' % norm) plot.histogram(plot_file, delta[raw_overall_success], 50, title='Distribution of $L_{%g}$ Distances of Successful Attacks' % norm, xlabel='Distance', ylabel='Count') log('[Testing] wrote %s' % plot_file) debug_accuracy = numpy.sum(self.accuracy) / self.accuracy.shape[0] debug_attack_fraction = numpy.sum(raw_overall_success) / numpy.sum(self.success >= 0) debug_test_fraction = numpy.sum(raw_overall_success) / numpy.sum(self.accuracy) log('[Testing] attacked mode accuracy: %g' % debug_accuracy) log('[Testing] only %g of successful attacks are valid' % debug_attack_fraction) log('[Testing] only %g of correct samples are successfully attacked' % debug_test_fraction) N_accuracy = numpy.sum(self.accuracy) self.results[n]['raw_success'] = numpy.sum(raw_overall_success) / N_accuracy self.results[n]['raw_iteration'] = numpy.average(self.success[raw_overall_success]) self.results[n]['raw_average'] = numpy.average(delta[raw_overall_success]) if numpy.any(raw_overall_success) else 0 self.results[n]['raw_image'] = numpy.average(image_delta[raw_overall_success]) if numpy.any(raw_overall_success) else 0 raw_class_success = numpy.zeros((self.N_class, self.perturbation_codes.shape[0]), bool) corrected_class_success = numpy.zeros((self.N_class, self.perturbation_codes.shape[0]), bool) self.results[n]['raw_class_success'] = numpy.zeros((self.N_class)) self.results[n]['raw_class_average'] = numpy.zeros((self.N_class)) self.results[n]['raw_class_image'] = numpy.zeros((self.N_class)) for c in range(self.N_class): N_samples = numpy.sum(self.accuracy[self.perturbation_codes == c].astype(int)) if N_samples <= 0: continue; raw_class_success[c] = numpy.logical_and(raw_overall_success, self.perturbation_codes == c) self.results[n]['raw_class_success'][c] = numpy.sum(raw_class_success[c]) / N_samples if numpy.any(raw_class_success[c]): self.results[n]['raw_class_average'][c] = numpy.average(delta[raw_class_success[c].astype(bool)]) if numpy.any(corrected_class_success[c]): self.results[n]['raw_class_image'][c] = numpy.average(image_delta[raw_class_success[c].astype(bool)]) if self.args.results_file: utils.write_pickle(self.args.results_file, self.results) log('[Testing] wrote %s' % self.args.results_file)
def loop(self): """ Main loop for training and testing, saving ... """ auto_encoder_params = { 'lr': self.args.base_lr, 'lr_decay': self.args.base_lr_decay, 'lr_min': 0.000000001, 'weight_decay': self.args.weight_decay } classifier_params = { 'lr': self.args.base_lr, 'lr_decay': self.args.base_lr_decay, 'lr_min': 0.000000001, 'weight_decay': self.args.weight_decay } e = 0 if os.path.exists(self.args.encoder_file) and os.path.exists( self.args.decoder_file) and os.path.exists( self.args.classifier_file): state = State.load(self.args.encoder_file) log('[Training] loaded %s' % self.args.encoder_file) self.encoder.load_state_dict(state.model) log('[Training] loaded encoder') if self.args.use_gpu and not cuda.is_cuda(self.encoder): self.encoder = self.encoder.cuda() optimizer = torch.optim.Adam(list(self.encoder.parameters()), auto_encoder_params['lr']) optimizer.load_state_dict(state.optimizer) self.encoder_scheduler = ADAMScheduler(optimizer, **auto_encoder_params) state = State.load(self.args.decoder_file) log('[Training] loaded %s' % self.args.decoder_file) self.decoder.load_state_dict(state.model) log('[Training] loaded decoder') if self.args.use_gpu and not cuda.is_cuda(self.decoder): self.decoder = self.decoder.cuda() optimizer = torch.optim.Adam(list(self.decoder.parameters()), auto_encoder_params['lr']) optimizer.load_state_dict(state.optimizer) self.decoder_scheduler = ADAMScheduler(optimizer, **auto_encoder_params) state = State.load(self.args.classifier_file) log('[Training] loaded %s' % self.args.classifier_file) self.classifier.load_state_dict(state.model) log('[Training] loaded decoder') if self.args.use_gpu and not cuda.is_cuda(self.classifier): self.classifier = self.classifier.cuda() optimizer = torch.optim.Adam(list(self.classifier.parameters()), classifier_params['lr']) optimizer.load_state_dict(state.optimizer) self.classifier_scheduler = ADAMScheduler(optimizer, **classifier_params) e = state.epoch + 1 self.encoder_scheduler.update(e) self.decoder_scheduler.udpate(e) self.classifier_scheduler.update(e) else: if self.args.use_gpu and not cuda.is_cuda(self.encoder): self.encoder = self.encoder.cuda() if self.args.use_gpu and not cuda.is_cuda(self.decoder): self.decoder = self.decoder.cuda() if self.args.use_gpu and not cuda.is_cuda(self.classifier): self.classifier = self.classifier.cuda() self.encoder_scheduler = ADAMScheduler( list(self.encoder.parameters()), **auto_encoder_params) self.encoder_scheduler.initialize() # ! self.decoder_scheduler = ADAMScheduler( list(self.decoder.parameters()), **auto_encoder_params) self.decoder_scheduler.initialize() # ! self.classifier_scheduler = ADAMScheduler( list(self.classifier.parameters()), **classifier_params) self.classifier_scheduler.initialize() # ! log('[Training] model needs %gMiB' % (cuda.estimate_size(self.encoder) / (1024 * 1024))) while e < self.args.epochs: log('[Training] %s' % self.encoder_scheduler.report()) log('[Training] %s' % self.decoder_scheduler.report()) log('[Training] %s' % self.classifier_scheduler.report()) testing = elapsed(functools.partial(self.test, e)) training = elapsed(functools.partial(self.train, e)) log('[Training] %gs training, %gs testing' % (training, testing)) #utils.remove(self.args.encoder_file + '.%d' % (e - 1)) #utils.remove(self.args.decoder_file + '.%d' % (e - 1)) #utils.remove(self.args.classifier_file + '.%d' % (e - 1)) State.checkpoint(self.encoder, self.encoder_scheduler.optimizer, e, self.args.encoder_file + '.%d' % e) State.checkpoint(self.decoder, self.decoder_scheduler.optimizer, e, self.args.decoder_file + '.%d' % e) State.checkpoint(self.classifier, self.classifier_scheduler.optimizer, e, self.args.classifier_file + '.%d' % e) log('[Training] %d: checkpoint' % e) torch.cuda.empty_cache() # necessary? # Save statistics and plots. if self.args.training_file: utils.write_hdf5(self.args.training_file, self.train_statistics) log('[Training] %d: wrote %s' % (e, self.args.training_file)) if self.args.testing_file: utils.write_hdf5(self.args.testing_file, self.test_statistics) log('[Training] %d: wrote %s' % (e, self.args.testing_file)) #if utils.display(): # self.plot() e += 1 # ! testing = elapsed(functools.partial(self.test, e)) log('[Training] %gs testing' % (testing)) #utils.remove(self.args.encoder_file + '.%d' % (e - 1)) #utils.remove(self.args.decoder_file + '.%d' % (e - 1)) #utils.remove(self.args.classifier_file + '.%d' % (e - 1)) State.checkpoint(self.encoder, self.encoder_scheduler.optimizer, e, self.args.encoder_file) State.checkpoint(self.decoder, self.decoder_scheduler.optimizer, e, self.args.decoder_file) State.checkpoint(self.classifier, self.classifier_scheduler.optimizer, e, self.args.classifier_file) self.results = { 'training_statistics': self.train_statistics, 'testing_statistics': self.test_statistics, } if self.args.results_file: utils.write_pickle(self.args.results_file, self.results) log('[Training] wrote %s' % self.args.results_file)
def compute_latent_statistics(self): """ Compute latent statistics. """ N_class = numpy.max(self.test_codes) + 1 num_attempts = self.perturbations.shape[0] perturbations = numpy.swapaxes(self.perturbations, 0, 1) perturbations = perturbations.reshape( (perturbations.shape[0] * perturbations.shape[1], perturbations.shape[2])) success = numpy.swapaxes(self.success, 0, 1) success = success.reshape((success.shape[0] * success.shape[1])) accuracy = numpy.repeat(self.accuracy, num_attempts, axis=0) # Raw success is the base for all statistics, as we need to consider only these # attacks that are successful and where the classifier originally was correct. raw_overall_success = numpy.logical_and(success >= 0, accuracy) # For off-manifold attacks this should not happen, but save is save. if not numpy.any(raw_overall_success): for n in range(len(self.norms)): for type in [ 'raw_success', 'raw_iteration', 'raw_average', 'raw_latent' ]: self.results[n][type] = 0 for type in [ 'raw_class_success', 'raw_class_average', 'raw_class_latent' ]: self.results[n][type] = numpy.zeros((N_class)) if self.args.results_file: utils.write_pickle(self.args.results_file, self.results) log('[Testing] wrote %s' % self.args.results_file) log('[Testing] no successful attacks found, no plots') return perturbation_images = numpy.repeat(self.perturbation_images, num_attempts, axis=0) perturbation_codes = numpy.repeat(self.perturbation_codes, num_attempts, axis=0) # # Compute nearest neighbors for perturbations and test images, # to backproject them into the latent space. # Also compute the dot product betweenm perturbations and a local # plane approximation base don the three nearest neighbors. # log('[Testing] computing nearest neighbor ...') nearest_neighbors_indices = self.compute_nearest_neighbors( perturbation_images) nearest_neighbors = self.train_theta[nearest_neighbors_indices[:, 0]] perturbation_nearest_neighbor_indices = self.compute_nearest_neighbors( perturbations) perturbation_nearest_neighbor = self.train_theta[ perturbation_nearest_neighbor_indices[:, 0]] # Compute statistics over the perturbation with respect to the plane # defined by the three nearest neighbors of the corresponding test sample. if self.args.plot_directory and self.args.plot_manifolds and utils.display( ): pure_perturbations = perturbations[ raw_overall_success] - perturbation_images[raw_overall_success] pure_perturbations_norm = numpy.linalg.norm(pure_perturbations, ord=2, axis=1) for k in range(10): direction = perturbation_images[ raw_overall_success] - self.train_images[ nearest_neighbors_indices[:, k][raw_overall_success]] direction_norm = numpy.linalg.norm(direction, ord=2, axis=1) dot_products = numpy.einsum('ij,ij->i', direction, pure_perturbations) dot_product_norms = numpy.multiply(pure_perturbations_norm, direction_norm) dot_product_norms[dot_product_norms == 0] = 1 dot_products /= dot_product_norms dot_products = numpy.degrees(numpy.arccos(dot_products)) # matplotlib's hsitogram plots give weird error if there are NaN values, so simple check: if dot_products.shape[0] > 0 and not numpy.any( dot_products != dot_products): plot_file = os.path.join(self.args.plot_directory, 'dot_products_nn%d' % k) plot.histogram( plot_file, dot_products, 100, title= 'Dot Products Between Adversarial Perturbations and Direction to Nearest Neighbor %d' % k, xlabel='Dot Product (Between Normalized Vectors)', ylabel='Count') log('[Testing] wrote %s' % plot_file) # # We compute some simple statistics: # - raw success rate: fraction of successful attack without considering epsilon # - corrected success rate: fraction of successful attacks within epsilon-ball # - raw average perturbation: average distance to original samples (for successful attacks) # - corrected average perturbation: average distance to original samples for perturbations # within epsilon-ball (for successful attacks). # These statistics can also be computed per class. # And these statistics are computed with respect to three norms. if self.args.plot_directory and utils.display(): iterations = success[raw_overall_success] x = numpy.arange(numpy.max(iterations) + 1) y = numpy.bincount(iterations) plot_file = os.path.join(self.args.plot_directory, 'iterations') plot.bar(plot_file, x, y, title='Distribution of Iterations of Successful Attacks', xlabel='Number of Iterations', ylabel='Count') log('[Testing] wrote %s' % plot_file) for n in range(len(self.norms)): norm = self.norms[n] delta = numpy.linalg.norm(perturbation_images - perturbations, norm, axis=1) latent_delta = numpy.linalg.norm(nearest_neighbors - perturbation_nearest_neighbor, norm, axis=1) if self.args.plot_directory and utils.display(): plot_file = os.path.join(self.args.plot_directory, 'distances_l%g' % norm) plot.histogram( plot_file, delta[raw_overall_success], 50, title= 'Distribution of $L_{%g}$ Distances of Successful Attacks' % norm, xlabel='Distance', ylabel='Count') log('[Testing] wrote %s' % plot_file) #debug_accuracy = numpy.sum(accuracy) / accuracy.shape[0] #debug_attack_fraction = numpy.sum(raw_overall_success) / numpy.sum(success >= 0) #debug_test_fraction = numpy.sum(raw_overall_success) / numpy.sum(accuracy) #log('[Testing] attacked model accuracy: %g' % debug_accuracy) #log('[Testing] only %g of successful attacks are valid' % debug_attack_fraction) #log('[Testing] only %g of correct samples are successfully attacked' % debug_test_fraction) N_accuracy = numpy.sum(accuracy) self.results[n]['raw_success'] = numpy.sum( raw_overall_success) / N_accuracy self.results[n]['raw_iteration'] = numpy.average( success[raw_overall_success]) self.results[n]['raw_average'] = numpy.average( delta[raw_overall_success]) if numpy.any( raw_overall_success) else 0 self.results[n]['raw_latent'] = numpy.average( latent_delta[raw_overall_success]) if numpy.any( raw_overall_success) else 0 raw_class_success = numpy.zeros( (N_class, perturbation_images.shape[0]), bool) self.results[n]['raw_class_success'] = numpy.zeros((N_class)) self.results[n]['raw_class_average'] = numpy.zeros((N_class)) self.results[n]['raw_class_latent'] = numpy.zeros((N_class)) for c in range(N_class): N_samples = numpy.sum( numpy.logical_and(accuracy, perturbation_codes == c)) if N_samples <= 0: continue raw_class_success[c] = numpy.logical_and( raw_overall_success, perturbation_codes == c) self.results[n]['raw_class_success'][c] = numpy.sum( raw_class_success[c]) / N_samples if numpy.any(raw_class_success[c]): self.results[n]['raw_class_average'][c] = numpy.average( delta[raw_class_success[c].astype(bool)]) if numpy.any(raw_class_success[c]): self.results[n]['raw_class_latent'][c] = numpy.average( latent_delta[raw_class_success[c].astype(bool)]) if self.args.results_file: utils.write_pickle(self.args.results_file, self.results) log('[Testing] wrote %s' % self.args.results_file)
def compute_statistics(self): """ Compute statistics based on distances. """ N_class = numpy.max(self.test_codes) + 1 num_attempts = self.perturbations.shape[0] perturbations = numpy.swapaxes(self.perturbations, 0, 1) perturbations = perturbations.reshape( (perturbations.shape[0] * perturbations.shape[1], perturbations.shape[2])) success = numpy.swapaxes(self.success, 0, 1) success = success.reshape((success.shape[0] * success.shape[1])) accuracy = numpy.repeat(self.accuracy, num_attempts, axis=0) # Raw success is the base for all statistics, as we need to consider only these # attacks that are successful and where the classifier originally was correct. raw_overall_success = numpy.logical_and(success >= 0, accuracy) log('[Testing] %d valid attacks' % numpy.sum(raw_overall_success)) # For off-manifold attacks this should not happen, but save is save. if not numpy.any(raw_overall_success): for n in range(len(self.norms)): for type in [ 'raw_success', 'raw_iteration', 'raw_average', 'raw_latent' ]: self.results[n][type] = 0 for type in [ 'raw_class_success', 'raw_class_average', 'raw_class_latent' ]: self.results[n][type] = numpy.zeros((N_class)) if self.args.results_file: utils.write_pickle(self.args.results_file, self.results) log('[Testing] wrote %s' % self.args.results_file) log('[Testing] no successful attacks found, no plots') return perturbation_images = numpy.repeat(self.perturbation_images, num_attempts, axis=0) perturbation_codes = numpy.repeat(self.perturbation_codes, num_attempts, axis=0) # # We compute some simple statistics: # - raw success rate: fraction of successful attack without considering epsilon # - corrected success rate: fraction of successful attacks within epsilon-ball # - raw average perturbation: average distance to original samples (for successful attacks) # - corrected average perturbation: average distance to original samples for perturbations # within epsilon-ball (for successful attacks). # These statistics can also be computed per class. # And these statistics are computed with respect to three norms. if self.args.plot_directory and utils.display(): iterations = success[raw_overall_success] x = numpy.arange(numpy.max(iterations) + 1) y = numpy.bincount(iterations) plot_file = os.path.join(self.args.plot_directory, 'iterations') plot.bar(plot_file, x, y, title='Distribution of Iterations of Successful Attacks', xlabel='Number of Iterations', ylabel='Count') log('[Testing] wrote %s' % plot_file) for n in range(len(self.norms)): norm = self.norms[n] delta = numpy.linalg.norm(perturbation_images - perturbations, norm, axis=1) if self.args.plot_directory and utils.display(): plot_file = os.path.join(self.args.plot_directory, 'distances_l%g' % norm) plot.histogram( plot_file, delta[raw_overall_success], 50, title= 'Distribution of $L_{%g}$ Distances of Successful Attacks' % norm, xlabel='Distance', ylabel='Count') log('[Testing] wrote %s' % plot_file) #debug_accuracy = numpy.sum(accuracy) / accuracy.shape[0] #debug_attack_fraction = numpy.sum(raw_overall_success) / numpy.sum(success >= 0) #debug_test_fraction = numpy.sum(raw_overall_success) / numpy.sum(accuracy) #log('[Testing] attacked model accuracy: %g' % debug_accuracy) #log('[Testing] only %g of successful attacks are valid' % debug_attack_fraction) #log('[Testing] only %g of correct samples are successfully attacked' % debug_test_fraction) N_accuracy = numpy.sum(accuracy) self.results[n]['raw_success'] = numpy.sum( raw_overall_success) / N_accuracy self.results[n]['raw_iteration'] = numpy.average( success[raw_overall_success]) self.results[n]['raw_average'] = numpy.average( delta[raw_overall_success]) if numpy.any( raw_overall_success) else 0 self.results[n]['raw_latent'] = 0 raw_class_success = numpy.zeros( (N_class, perturbation_images.shape[0]), bool) self.results[n]['raw_class_success'] = numpy.zeros((N_class)) self.results[n]['raw_class_average'] = numpy.zeros((N_class)) self.results[n]['raw_class_latent'] = numpy.zeros((N_class)) for c in range(N_class): N_samples = numpy.sum( numpy.logical_and(accuracy, perturbation_codes == c)) if N_samples <= 0: continue raw_class_success[c] = numpy.logical_and( raw_overall_success, perturbation_codes == c) self.results[n]['raw_class_success'][c] = numpy.sum( raw_class_success[c]) / N_samples if numpy.any(raw_class_success[c]): self.results[n]['raw_class_average'][c] = numpy.average( delta[raw_class_success[c].astype(bool)]) if self.args.results_file: utils.write_pickle(self.args.results_file, self.results) log('[Testing] wrote %s' % self.args.results_file)
def test(self): """ Test the model. """ assert self.model is not None assert self.model.training is False assert self.test_images.shape[0] == self.test_codes.shape[ 0], 'number of samples have to match' self.loss = 0. self.error = 0. num_batches = int( math.ceil(self.test_images.shape[0] / self.args.batch_size)) for b in range(num_batches): b_start = b * self.args.batch_size b_end = min((b + 1) * self.args.batch_size, self.test_images.shape[0]) batch_images = common.torch.as_variable( self.test_images[b_start:b_end], self.args.use_gpu) batch_classes = common.torch.as_variable( self.test_codes[b_start:b_end], self.args.use_gpu) batch_images = batch_images.permute(0, 3, 1, 2) output_classes = self.model(batch_images) e = torch.nn.functional.cross_entropy(output_classes, batch_classes, size_average=True) self.loss += e.item() values, indices = torch.max(torch.nn.functional.softmax( output_classes, dim=1), dim=1) errors = torch.abs(indices - batch_classes) e = torch.sum(errors > 0).float() / batch_classes.size()[0] self.error += e.item() self.accuracy = common.numpy.concatenate(self.accuracy, errors.data.cpu().numpy()) self.loss /= num_batches self.error /= num_batches log('[Testing] test loss %g; test error %g' % (self.loss, self.error)) self.accuracy = self.accuracy == 0 if self.args.accuracy_file: utils.write_hdf5(self.args.accuracy_file, self.accuracy) log('[Testing] wrote %s' % self.args.accuracy_file) accuracy = numpy.sum(self.accuracy) / self.accuracy.shape[0] if numpy.abs(1 - accuracy - self.error) < 1e-4: log('[Testing] accuracy file is with %g accuracy correct' % accuracy) self.results = { 'loss': self.loss, 'error': self.error, } if self.args.results_file: utils.write_pickle(self.args.results_file, self.results) log('[Testing] wrote %s' % self.args.results_file)