def load_data(self): """ Load data. """ self.test_images = utils.read_hdf5(self.args.test_images_file).astype( numpy.float32) log('[Attack] read %s' % self.args.test_images_file) # For color and gray images. if len(self.test_images.shape) < 4: self.test_images = numpy.expand_dims(self.test_images, axis=3) self.test_codes = utils.read_hdf5(self.args.test_codes_file).astype( numpy.int) self.test_codes = self.test_codes[:, self.args.label_index] log('[Attack] read %s' % self.args.test_codes_file) self.test_theta = utils.read_hdf5(self.args.test_theta_file).astype( numpy.float32) log('[Attack] read %s' % self.args.test_theta_file) self.N_class = numpy.max(self.test_codes) + 1 self.min_bound = numpy.min(self.test_theta, 0) self.max_bound = numpy.max(self.test_theta, 0) if self.args.max_samples < 0: self.args.max_samples = self.test_theta.shape[0] else: self.args.max_samples = min(self.args.max_samples, self.test_theta.shape[0])
def load_models(self): """ Load models. """ self.N_class = numpy.max(self.test_codes) + 1 network_units = list(map(int, self.args.network_units.split(','))) log('[Testing] using %d input channels' % self.test_images.shape[3]) self.model = models.Classifier( self.N_class, resolution=(self.test_images.shape[3], self.test_images.shape[1], self.test_images.shape[2]), architecture=self.args.network_architecture, activation=self.args.network_activation, batch_normalization=not self.args.network_no_batch_normalization, start_channels=self.args.network_channels, dropout=self.args.network_dropout, units=network_units) assert os.path.exists( self.args.classifier_file ), 'state file %s not found' % self.args.classifier_file state = State.load(self.args.classifier_file) log('[Testing] read %s' % self.args.classifier_file) self.model.load_state_dict(state.model) if self.args.use_gpu and not cuda.is_cuda(self.model): log('[Testing] classifier is not CUDA') self.model = self.model.cuda() log('[Testing] loaded classifier') # ! self.model.eval() log('[Testing] set classifier to eval')
def load_data(self): """ Load data. """ self.test_images = utils.read_hdf5(self.args.test_images_file).astype( numpy.float32) log('[Visualization] read %s' % self.args.test_images_file) self.test_codes = utils.read_hdf5(self.args.test_codes_file).astype( numpy.int) self.test_codes = self.test_codes[:, self.args.label_index] log('[Visualization] read %s' % self.args.test_codes_file) self.N_class = numpy.max(self.test_codes) + 1 self.resolution = self.test_images.shape[1] self.image_channels = self.test_images.shape[3] if len( self.test_images.shape) > 3 else 1 log('[Visualization] resolution %d' % self.resolution) if self.args.max_samples < 0: self.args.max_samples = self.test_codes.shape[0] else: self.args.max_samples = min(self.args.max_samples, self.test_codes.shape[0])
def load_data(self): """ Load data and model. """ self.test_images = utils.read_hdf5(self.args.test_images_file).astype(numpy.float32) log('[Testing] read %s' % self.args.test_images_file) # For handling both color and gray images. if len(self.test_images.shape) < 4: self.test_images = numpy.expand_dims(self.test_images, axis=3) log('[Testing] no color images, adjusted size') self.resolution = self.test_images.shape[2] log('[Testing] resolution %d' % self.resolution) self.train_images = utils.read_hdf5(self.args.train_images_file).astype(numpy.float32) # ! self.train_images = self.train_images.reshape((self.train_images.shape[0], -1)) log('[Testing] read %s' % self.args.train_images_file) self.test_codes = utils.read_hdf5(self.args.test_codes_file).astype(numpy.int) self.test_codes = self.test_codes[:, self.args.label_index] self.N_class = numpy.max(self.test_codes) + 1 log('[Testing] read %s' % self.args.test_codes_file) self.accuracy = utils.read_hdf5(self.args.accuracy_file) log('[Testing] read %s' % self.args.accuracy_file) self.perturbations = utils.read_hdf5(self.args.perturbations_file).astype(numpy.float32) self.N_attempts = self.perturbations.shape[0] # First, repeat relevant data. self.test_images = numpy.repeat(self.test_images[:self.perturbations.shape[1]], self.N_attempts, axis=0) self.perturbation_codes = numpy.repeat(self.test_codes[:self.perturbations.shape[1]], self.N_attempts, axis=0) self.perturbation_codes = numpy.squeeze(self.perturbation_codes) self.accuracy = numpy.repeat(self.accuracy[:self.perturbations.shape[1]], self.N_attempts, axis=0) # Then, reshape the perturbations! self.perturbations = numpy.swapaxes(self.perturbations, 0, 1) self.perturbations = self.perturbations.reshape((self.perturbations.shape[0] * self.perturbations.shape[1], -1)) assert self.perturbations.shape[1] == self.args.N_theta log('[Testing] read %s' % self.args.perturbations_file) assert not numpy.any(self.perturbations != self.perturbations), 'NaN in perturbations' self.success = utils.read_hdf5(self.args.success_file) self.success = numpy.swapaxes(self.success, 0, 1) self.success = self.success.reshape((self.success.shape[0] * self.success.shape[1])) log('[Testing] read %s' % self.args.success_file) log('[Testing] using %d input channels' % self.test_images.shape[3]) assert self.args.N_theta > 0 and self.args.N_theta <= 9 decoder = models.STNDecoder(self.args.N_theta) # decoder.eval() log('[Testing] set up STN decoder') self.model = decoder
def main(self): """ Main which should be overwritten. """ self.test_images = utils.read_hdf5(self.args.test_images_file).astype( numpy.float32) log('[Testing] read %s' % self.args.test_images_file) # For handling both color and gray images. if len(self.test_images.shape) < 4: self.test_images = numpy.expand_dims(self.test_images, axis=3) log('[Testing] no color images, adjusted size') self.resolution = self.test_images.shape[2] log('[Testing] resolution %d' % self.resolution) self.test_codes = utils.read_hdf5(self.args.test_codes_file).astype( numpy.int) self.test_codes = self.test_codes[:, self.args.label_index] log('[Testing] read %s' % self.args.test_codes_file) N_class = numpy.max(self.test_codes) + 1 network_units = list(map(int, self.args.network_units.split(','))) log('[Testing] using %d input channels' % self.test_images.shape[3]) self.model = models.Classifier( N_class, resolution=(self.test_images.shape[3], self.test_images.shape[1], self.test_images.shape[2]), architecture=self.args.network_architecture, activation=self.args.network_activation, batch_normalization=not self.args.network_no_batch_normalization, start_channels=self.args.network_channels, dropout=self.args.network_dropout, units=network_units) assert os.path.exists( self.args.state_file ), 'state file %s not found' % self.args.state_file state = State.load(self.args.state_file) log('[Testing] read %s' % self.args.state_file) self.model.load_state_dict(state.model) if self.args.use_gpu and not cuda.is_cuda(self.model): log('[Testing] model is not CUDA') self.model = self.model.cuda() log('[Testing] loaded model') self.model.eval() log('[Testing] set classifier to eval') self.test()
def load_data(self): """ Load data. """ test_codes = utils.read_hdf5(self.args.test_codes_file).astype(numpy.int) self.test_fonts = test_codes[:, 1] self.test_classes = test_codes[:, 2] log('[Attack] read %s' % self.args.test_codes_file) self.test_theta = utils.read_hdf5(self.args.test_theta_file).astype(numpy.float32) log('[Attack] read %s' % self.args.test_theta_file) self.min_bound = numpy.min(self.test_theta, 0) self.max_bound = numpy.max(self.test_theta, 0) if self.args.max_samples < 0: self.args.max_samples = self.test_theta.shape[0] else: self.args.max_samples = min(self.args.max_samples, self.test_theta.shape[0])
def compute_statistics(self): """ Compute statistics based on distances. """ num_attempts = self.perturbations.shape[0] perturbations = numpy.swapaxes(self.perturbations, 0, 1) perturbations = perturbations.reshape( (perturbations.shape[0] * perturbations.shape[1], perturbations.shape[2])) success = numpy.swapaxes(self.success, 0, 1) success = success.reshape((success.shape[0] * success.shape[1])) probabilities = numpy.swapaxes(self.probabilities, 0, 1) probabilities = probabilities.reshape( (probabilities.shape[0] * probabilities.shape[1], -1)) confidences = numpy.max(probabilities, 1) perturbation_probabilities = self.test_probabilities[:self.success. shape[1]] perturbation_probabilities = numpy.repeat(perturbation_probabilities, num_attempts, axis=0) perturbation_confidences = numpy.max(perturbation_probabilities, 1) probability_ratios = confidences / perturbation_confidences raw_overall_success = success >= 0 log('[Testing] %d valid attacks' % numpy.sum(raw_overall_success)) # For off-manifold attacks this should not happen, but save is save. if not numpy.any(raw_overall_success): for type in [ 'raw_success', 'raw_iteration', 'raw_roc', 'raw_confidence_weighted_success', 'raw_confidence', 'raw_ratios' ]: self.results[type] = 0 if self.args.results_file: utils.write_pickle(self.args.results_file, self.results) log('[Testing] wrote %s' % self.args.results_file) log('[Testing] no successful attacks found, no plots') return # # We compute some simple statistics: # - raw success rate: fraction of successful attack without considering epsilon # - corrected success rate: fraction of successful attacks within epsilon-ball # - raw average perturbation: average distance to original samples (for successful attacks) # - corrected average perturbation: average distance to original samples for perturbations # within epsilon-ball (for successful attacks). # These statistics can also be computed per class. # And these statistics are computed with respect to three norms. if self.args.plot_directory and utils.display(): iterations = success[raw_overall_success] x = numpy.arange(numpy.max(iterations) + 1) y = numpy.bincount(iterations) plot_file = os.path.join(self.args.plot_directory, 'iterations') plot.bar(plot_file, x, y, title='Distribution of Iterations of Successful Attacks', xlabel='Number of Iterations', ylabel='Count') log('[Testing] wrote %s' % plot_file) plot_file = os.path.join(self.args.plot_directory, 'probabilities') plot.histogram(plot_file, confidences[raw_overall_success], 50) log('[Testing] wrote %s' % plot_file) plot_file = os.path.join(self.args.plot_directory, 'probability_ratios') plot.histogram(plot_file, probability_ratios, 50) log('[Testing] wrote %s' % plot_file) plot_file = os.path.join(self.args.plot_directory, 'test_probabilities') plot.histogram( plot_file, self.test_probabilities[ numpy.arange(self.test_probabilities.shape[0]), self.test_codes], 50) log('[Testing] wrote %s' % plot_file) y_true = numpy.concatenate( (numpy.zeros(confidences.shape[0]), numpy.ones(perturbation_confidences.shape[0]))) y_score = numpy.concatenate((confidences, perturbation_confidences)) roc_auc_score = sklearn.metrics.roc_auc_score(y_true, y_score) self.results['raw_roc'] = roc_auc_score self.results['raw_confidence_weighted_success'] = numpy.sum( confidences[raw_overall_success]) / numpy.sum( perturbation_confidences) self.results['raw_confidence'] = numpy.mean( probabilities[raw_overall_success]) self.results['raw_ratios'] = numpy.mean( probability_ratios[raw_overall_success]) self.results['raw_success'] = numpy.sum( raw_overall_success) / success.shape[0] self.results['raw_iteration'] = numpy.average( success[raw_overall_success]) if self.args.results_file: utils.write_pickle(self.args.results_file, self.results) log('[Testing] wrote %s' % self.args.results_file)
def compute_statistics(self): """ Compute statistics based on distances. """ # That's the basis for all computation as we only want to consider successful attacks # on test samples that were correctly classified. raw_overall_success = numpy.logical_and(self.success >= 0, self.accuracy) # Important check, for on-manifold attack this will happen if the manifold is small and the model very accurate! if not numpy.any(raw_overall_success): for n in range(len(self.norms)): for type in ['raw_success', 'raw_iteration', 'raw_average', 'raw_image']: self.results[n][type] = 0 for type in ['raw_class_success', 'raw_class_average', 'raw_class_image']: self.results[n][type] = numpy.zeros((self.N_class)) if self.args.results_file: utils.write_pickle(self.args.results_file, self.results) log('[Testing] wrote %s' % self.args.results_file) return # # Compute nearest neighbor statistics in image space. # if self.args.plot_directory and self.args.plot_manifolds and utils.display(): log('[Testing] computing nearest neighbor ...') nearest_neighbors_indices = self.compute_nearest_neighbors(self.perturbation_images[raw_overall_success]) pure_perturbations = self.test_images[raw_overall_success] - self.perturbation_images[raw_overall_success] pure_perturbations_norm = numpy.linalg.norm(pure_perturbations, ord=2, axis=1) for k in range(10): direction = self.perturbation_images[raw_overall_success] - self.train_images[nearest_neighbors_indices[:, k]] direction_norm = numpy.linalg.norm(direction, ord=2, axis=1) dot_products = numpy.einsum('ij,ij->i', direction, pure_perturbations) dot_product_norms = numpy.multiply(pure_perturbations_norm, direction_norm) dot_products, dot_product_norms = dot_products[dot_product_norms > 10**-8], dot_product_norms[dot_product_norms > 10**-8] dot_products /= dot_product_norms dot_products = numpy.degrees(numpy.arccos(dot_products)) # matplotlib's hsitogram plots give weird error if there are NaN values, so simple check: if dot_products.shape[0] > 0 and not numpy.any(dot_products != dot_products): plot_file = os.path.join(self.args.plot_directory, 'dot_products_nn%d' % k) plot.histogram(plot_file, dot_products, 100, xmin=numpy.min(dot_products), xmax=numpy.max(dot_products), title='Dot Products Between Adversarial Perturbations and Direction to Nearest Neighbor %d' % k, xlabel='Dot Product', ylabel='Count') log('[Testing] wrote %s' % plot_file) # # We compute some simple statistics: # - raw success rate: fraction of successful attack without considering epsilon # - corrected success rate: fraction of successful attacks within epsilon-ball # - raw average perturbation: average distance to original samples (for successful attacks) # - corrected average perturbation: average distance to original samples for perturbations # within epsilon-ball (for successful attacks). # These statistics can also be computed per class. # And these statistics are computed with respect to three norms. if self.args.plot_directory and utils.display(): iterations = self.success[raw_overall_success] x = numpy.arange(numpy.max(iterations) + 1) y = numpy.bincount(iterations) plot_file = os.path.join(self.args.plot_directory, 'iterations') plot.bar(plot_file, x, y, title='Distribution of Iterations of Successful Attacks', xlabel='Number of Iterations', ylabel='Count') log('[Testing] wrote %s' % plot_file) reference_perturbations = numpy.zeros(self.perturbations.shape) if self.args.N_theta > 4: reference_perturbations[:, 4] = 1 for n in range(len(self.norms)): norm = self.norms[n] delta = numpy.linalg.norm(self.perturbations - reference_perturbations, norm, axis=1) image_delta = numpy.linalg.norm(self.test_images - self.perturbation_images, norm, axis=1) if self.args.plot_directory and utils.display(): plot_file = os.path.join(self.args.plot_directory, 'distances_l%g' % norm) plot.histogram(plot_file, delta[raw_overall_success], 50, title='Distribution of $L_{%g}$ Distances of Successful Attacks' % norm, xlabel='Distance', ylabel='Count') log('[Testing] wrote %s' % plot_file) debug_accuracy = numpy.sum(self.accuracy) / self.accuracy.shape[0] debug_attack_fraction = numpy.sum(raw_overall_success) / numpy.sum(self.success >= 0) debug_test_fraction = numpy.sum(raw_overall_success) / numpy.sum(self.accuracy) log('[Testing] attacked mode accuracy: %g' % debug_accuracy) log('[Testing] only %g of successful attacks are valid' % debug_attack_fraction) log('[Testing] only %g of correct samples are successfully attacked' % debug_test_fraction) N_accuracy = numpy.sum(self.accuracy) self.results[n]['raw_success'] = numpy.sum(raw_overall_success) / N_accuracy self.results[n]['raw_iteration'] = numpy.average(self.success[raw_overall_success]) self.results[n]['raw_average'] = numpy.average(delta[raw_overall_success]) if numpy.any(raw_overall_success) else 0 self.results[n]['raw_image'] = numpy.average(image_delta[raw_overall_success]) if numpy.any(raw_overall_success) else 0 raw_class_success = numpy.zeros((self.N_class, self.perturbation_codes.shape[0]), bool) corrected_class_success = numpy.zeros((self.N_class, self.perturbation_codes.shape[0]), bool) self.results[n]['raw_class_success'] = numpy.zeros((self.N_class)) self.results[n]['raw_class_average'] = numpy.zeros((self.N_class)) self.results[n]['raw_class_image'] = numpy.zeros((self.N_class)) for c in range(self.N_class): N_samples = numpy.sum(self.accuracy[self.perturbation_codes == c].astype(int)) if N_samples <= 0: continue; raw_class_success[c] = numpy.logical_and(raw_overall_success, self.perturbation_codes == c) self.results[n]['raw_class_success'][c] = numpy.sum(raw_class_success[c]) / N_samples if numpy.any(raw_class_success[c]): self.results[n]['raw_class_average'][c] = numpy.average(delta[raw_class_success[c].astype(bool)]) if numpy.any(corrected_class_success[c]): self.results[n]['raw_class_image'][c] = numpy.average(image_delta[raw_class_success[c].astype(bool)]) if self.args.results_file: utils.write_pickle(self.args.results_file, self.results) log('[Testing] wrote %s' % self.args.results_file)
def test_test(self): """ Test on testing set. """ num_batches = int( math.ceil(self.test_images.shape[0] / self.args.batch_size)) for b in range(num_batches): b_start = b * self.args.batch_size b_end = min((b + 1) * self.args.batch_size, self.test_images.shape[0]) batch_images = common.torch.as_variable( self.test_images[b_start:b_end], self.args.use_gpu) batch_images = batch_images.permute(0, 3, 1, 2) # Important to get the correct codes! output_codes, output_logvar = self.encoder(batch_images) output_images = self.decoder(output_codes) e = self.reconstruction_loss(batch_images, output_images) self.reconstruction_error += e.data self.code_mean += torch.mean(output_codes).item() self.code_var += torch.var(output_codes).item() output_images = numpy.squeeze( numpy.transpose(output_images.cpu().detach().numpy(), (0, 2, 3, 1))) self.pred_images = common.numpy.concatenate( self.pred_images, output_images) output_codes = output_codes.cpu().detach().numpy() self.pred_codes = common.numpy.concatenate(self.pred_codes, output_codes) if b % 100 == 50: log('[Testing] %d' % b) assert self.pred_images.shape[0] == self.test_images.shape[ 0], 'computed invalid number of test images' if self.args.reconstruction_file: utils.write_hdf5(self.args.reconstruction_file, self.pred_images) log('[Testing] wrote %s' % self.args.reconstruction_file) if self.args.test_theta_file: assert self.pred_codes.shape[0] == self.test_images.shape[ 0], 'computed invalid number of test codes' utils.write_hdf5(self.args.test_theta_file, self.pred_codes) log('[Testing] wrote %s' % self.args.test_theta_file) threshold = 0.9 percentage = 0 # values = numpy.linalg.norm(pred_codes, ord=2, axis=1) values = numpy.max(numpy.abs(self.pred_codes), axis=1) while percentage < 0.9: threshold += 0.1 percentage = numpy.sum(values <= threshold) / float( values.shape[0]) log('[Testing] threshold %g percentage %g' % (threshold, percentage)) log('[Testing] taking threshold %g with percentage %g' % (threshold, percentage)) if self.args.output_directory and utils.display(): # fit = 10 # plot_file = os.path.join(self.args.output_directory, 'test_codes') # plot.manifold(plot_file, pred_codes[::fit], None, None, 'tsne', None, title='t-SNE of Test Codes') # log('[Testing] wrote %s' % plot_file) for d in range(1, self.pred_codes.shape[1]): plot_file = os.path.join(self.args.output_directory, 'test_codes_%s' % d) plot.scatter( plot_file, self.pred_codes[:, 0], self.pred_codes[:, d], (values <= threshold).astype(int), ['greater %g' % threshold, 'smaller %g' % threshold], title='Dimensions 0 and %d of Test Codes' % d) log('[Testing] wrote %s' % plot_file) self.reconstruction_error /= num_batches log('[Testing] reconstruction error %g' % self.reconstruction_error)
def visualize_perturbations(self): """ Visualize perturbations. """ num_attempts = self.perturbations.shape[1] num_attempts = min(num_attempts, 6) utils.makedir(self.args.output_directory) count = 0 for i in range(min(1000, self.perturbations.shape[0])): log('[Visualization] sample %d, iterations %s and correctly classified: %s' % (i + 1, ' '.join(list(map( str, self.success[i]))), self.accuracy[i])) if not numpy.any(self.success[i] >= 0) or not self.accuracy[i]: continue elif count > 200: break #fig, axes = pyplot.subplots(num_attempts, 8) #if num_attempts == 1: # axes = [axes] # dirty hack for axis indexing for j in range(num_attempts): theta = self.test_theta[i] theta_attack = self.perturbations[i][j] theta_perturbation = theta_attack - theta image = self.test_images[i] image_attack = self.perturbation_images[i][j] image_perturbation = image_attack - image max_theta_perturbation = numpy.max( numpy.abs(theta_perturbation)) theta_perturbation /= max_theta_perturbation max_image_perturbation = numpy.max( numpy.abs(image_perturbation)) image_perturbation /= max_image_perturbation image_representation = self.theta_representations[i] attack_representation = self.perturbation_representations[i][j] image_label = numpy.argmax(image_representation) attack_label = numpy.argmax(attack_representation) #vmin = min(numpy.min(theta), numpy.min(theta_attack)) #vmax = max(numpy.max(theta), numpy.max(theta_attack)) #axes[j][0].imshow(theta.reshape(1, -1), interpolation='nearest', vmin=vmin, vmax=vmax) #axes[j][1].imshow(numpy.squeeze(image), interpolation='nearest', cmap='gray', vmin=0, vmax=1) #axes[j][2].imshow(theta_perturbation.reshape(1, -1), interpolation='nearest', vmin=vmin, vmax=vmax) #axes[j][2].text(0, -1, 'x' + str(max_theta_perturbation)) #axes[j][3].imshow(numpy.squeeze(image_perturbation), interpolation='nearest', cmap='seismic', vmin=-1, vmax=1) #axes[j][3].text(0, -image.shape[1]//8, 'x' + str(max_image_perturbation)) #axes[j][4].imshow(theta_attack.reshape(1, -1), interpolation='nearest', vmin=vmin, vmax=vmax) #axes[j][5].imshow(numpy.squeeze(image_attack), interpolation='nearest', cmap='gray', vmin=0, vmax=1) #axes[j][6].imshow(image_representation.reshape(1, -1), interpolation='nearest', vmin=vmin, vmax=vmax) #axes[j][6].text(0, -1, 'Label:' + str(image_label)) #axes[j][7].imshow(attack_representation.reshape(1, -1), interpolation='nearest', vmin=vmin, vmax=vmax) #axes[j][7].text(0, -1, 'Label:' + str(attack_label)) image_file = os.path.join( self.args.output_directory, '%d_%d_image_%d.png' % (i, j, image_label)) attack_file = os.path.join( self.args.output_directory, '%d_%d_attack_%d.png' % (i, j, attack_label)) perturbation_file = os.path.join( self.args.output_directory, '%d_%d_perturbation_%g.png' % (i, j, max_image_perturbation)) vis.image(image_file, image, scale=10) vis.image(attack_file, image_attack, scale=10) vis.perturbation(perturbation_file, image_perturbation, scale=10) #plot_file = os.path.join(self.args.output_directory, str(i) + '.png') #pyplot.savefig(plot_file) #pyplot.close(fig) count += 1
def load_data(self): """ Load data. """ assert self.args.batch_size % 4 == 0 self.train_images = utils.read_hdf5( self.args.train_images_file).astype(numpy.float32) log('[Training] read %s' % self.args.train_images_file) self.test_images = utils.read_hdf5(self.args.test_images_file).astype( numpy.float32) log('[Training] read %s' % self.args.test_images_file) # For handling both color and gray images. if len(self.train_images.shape) < 4: self.train_images = numpy.expand_dims(self.train_images, axis=3) self.test_images = numpy.expand_dims(self.test_images, axis=3) log('[Training] no color images, adjusted size') self.resolution = self.test_images.shape[2] log('[Training] resolution %d' % self.resolution) self.train_codes = utils.read_hdf5(self.args.train_codes_file).astype( numpy.int) assert self.train_codes.shape[1] >= self.args.label_index + 1 self.train_codes = self.train_codes[:, self.args.label_index] log('[Training] read %s' % self.args.train_codes_file) self.N_class = numpy.max(self.train_codes) + 1 self.test_codes = utils.read_hdf5(self.args.test_codes_file).astype( numpy.int) assert self.test_codes.shape[1] >= self.args.label_index + 1 self.test_codes = self.test_codes[:, self.args.label_index] log('[Training] read %s' % self.args.test_codes_file) self.train_theta = utils.read_hdf5(self.args.train_theta_file).astype( numpy.float32) log('[Training] read %s' % self.args.train_theta_file) assert self.test_images.shape[0] == self.test_codes.shape[0] self.min_bound = numpy.min(self.train_theta, axis=0) self.max_bound = numpy.max(self.train_theta, axis=0) log('[Training] min bound: %s' % ' '.join( ['%g' % self.min_bound[i] for i in range(self.min_bound.shape[0])])) log('[Training] max bound: %s' % ' '.join( ['%g' % self.max_bound[i] for i in range(self.max_bound.shape[0])])) self.test_theta = utils.read_hdf5(self.args.test_theta_file).astype( numpy.float32) log('[Training] read %s' % self.args.test_theta_file) assert self.train_codes.shape[0] == self.train_images.shape[0] assert self.test_codes.shape[0] == self.test_images.shape[0] assert self.train_theta.shape[ 0] == self.train_images.shape[0], '%s != %s' % ('x'.join( list(map(str, self.train_theta.shape))), 'x'.join( list(map(str, self.train_images.shape)))) assert self.test_theta.shape[0] == self.test_images.shape[0] # Select subset of samples if self.args.training_samples < 0: self.args.training_samples = self.train_images.shape[0] else: self.args.training_samples = min(self.args.training_samples, self.train_images.shape[0]) log('[Training] using %d training samples' % self.args.training_samples) if self.args.test_samples < 0: self.args.test_samples = self.test_images.shape[0] else: self.args.test_samples = min(self.args.test_samples, self.test_images.shape[0]) if self.args.early_stopping: assert self.args.validation_samples > 0 assert self.args.training_samples + self.args.validation_samples <= self.train_images.shape[ 0] self.val_images = self.train_images[self.train_images.shape[0] - self.args.validation_samples:] self.val_codes = self.train_codes[self.train_codes.shape[0] - self.args.validation_samples:] self.train_images = self.train_images[:self.train_images.shape[0] - self.args.validation_samples] self.train_codes = self.train_codeſ[:self.train_codes.shape[0] - self.args.validation_samples] assert self.val_images.shape[ 0] == self.args.validation_samples and self.val_codes.shape[ 0] == self.args.validation_samples if self.args.random_samples: perm = numpy.random.permutation(self.train_images.shape[0] // 10) perm = perm[:self.args.training_samples // 10] perm = numpy.repeat(perm, self.N_class, axis=0) * 10 + numpy.tile( numpy.array(range(self.N_class)), (perm.shape[0])) self.train_images = self.train_images[perm] self.train_codes = self.train_codes[perm] self.train_theta = self.train_theta[perm] else: self.train_images = self.train_images[:self.args.training_samples] self.train_codes = self.train_codes[:self.args.training_samples] self.train_theta = self.train_theta[:self.args.training_samples] self.train_valid = (numpy.max(numpy.abs(self.train_theta), axis=1) <= self.args.bound).astype(int) self.test_valid = (numpy.max(numpy.abs(self.test_theta), axis=1) <= self.args.bound).astype(int) # Check that the dataset is balanced. number_samples = self.train_codes.shape[0] // self.N_class for c in range(self.N_class): number_samples_ = numpy.sum(self.train_codes == c) if number_samples_ != number_samples: log( '[Training] dataset not balanced, class %d should have %d samples but has %d' % (c, number_samples, number_samples_), LogLevel.WARNING)
def visualize_perturbations(self): """ Visualize perturbations. """ num_attempts = self.perturbations.shape[1] num_attempts = min(num_attempts, 6) utils.makedir(self.args.output_directory) count = 0 for i in range(min(1000, self.perturbations.shape[0])): if not numpy.any(self.success[i]) or not self.accuracy[i]: continue elif count > 200: break #fig, axes = pyplot.subplots(num_attempts, 5) #if num_attempts == 1: # axes = [axes] # dirty hack for axis indexing for j in range(num_attempts): image = self.test_images[i] attack = self.perturbations[i][j] perturbation = attack - image max_perturbation = numpy.max(numpy.abs(perturbation)) perturbation /= max_perturbation image_representation = self.image_representations[i] attack_representation = self.perturbation_representations[i][j] image_label = numpy.argmax(image_representation) attack_label = numpy.argmax(attack_representation) #axes[j][0].imshow(numpy.squeeze(image), interpolation='nearest', cmap='gray', vmin=0, vmax=1) #axes[j][1].imshow(numpy.squeeze(perturbation), interpolation='nearest', cmap='seismic', vmin=-1, vmax=1) #axes[j][1].text(0, -image.shape[1]//8, 'x' + str(max_perturbation)) #axes[j][2].imshow(numpy.squeeze(attack), interpolation='nearest', cmap='gray', vmin=0, vmax=1) #vmin = min(numpy.min(image_representation), numpy.min(attack_representation)) #vmax = max(numpy.max(image_representation), numpy.max(attack_representation)) #axes[j][3].imshow(image_representation.reshape(1, -1), interpolation='nearest', vmin=vmin, vmax=vmax) #axes[j][3].text(0, -1, 'Label:' + str(image_label)) #axes[j][4].imshow(attack_representation.reshape(1, -1), interpolation='nearest', vmin=vmin, vmax=vmax) #axes[j][4].text(0, -1, 'Label:' + str(attack_label)) image_file = os.path.join( self.args.output_directory, '%d_%d_image_%d.png' % (i, j, image_label)) attack_file = os.path.join( self.args.output_directory, '%d_%d_attack_%d.png' % (i, j, attack_label)) perturbation_file = os.path.join( self.args.output_directory, '%d_%d_perturbation_%g.png' % (i, j, max_perturbation)) vis.image(image_file, image, scale=10) vis.image(attack_file, attack, scale=10) vis.perturbation(perturbation_file, perturbation, scale=10) if len(perturbation.shape) > 2: perturbation_magnitude = numpy.linalg.norm(perturbation, ord=2, axis=2) max_perturbation_magnitude = numpy.max( numpy.abs(perturbation_magnitude)) perturbation_magnitude /= max_perturbation_magnitude perturbation_file = os.path.join( self.args.output_directory, '%d_%d_perturbation_magnitude_%g.png' % (i, j, max_perturbation_magnitude)) vis.perturbation(perturbation_file, perturbation_magnitude, scale=10) #plot_file = os.path.join(self.args.output_directory, str(i) + '.png') #pyplot.savefig(plot_file) #pyplot.close(fig) count += 1
def load_data_and_model(self): """ Load data and model. """ self.test_images = utils.read_hdf5(self.args.test_images_file).astype( numpy.float32) if len(self.test_images.shape) < 4: self.test_images = numpy.expand_dims(self.test_images, axis=3) resolution = self.test_images.shape[2] log('[Visualization] read %s' % self.args.test_images_file) self.test_codes = utils.read_hdf5(self.args.test_codes_file).astype( numpy.int) self.test_codes = self.test_codes[:, self.args.label_index] N_class = numpy.max(self.test_codes) + 1 log('[Visualization] read %s' % self.args.test_codes_file) self.perturbations = utils.read_hdf5( self.args.perturbations_file).astype(numpy.float32) if len(self.perturbations.shape) < 5: self.perturbations = numpy.expand_dims(self.perturbations, axis=4) self.perturbations = numpy.swapaxes(self.perturbations, 0, 1) self.test_images = self.test_images[:self.perturbations.shape[0]] log('[Visualization] read %s' % self.args.perturbations_file) self.success = utils.read_hdf5(self.args.success_file) self.success = numpy.swapaxes(self.success, 0, 1) self.success = self.success >= 0 log('[Visualization] read %s' % self.args.success_file) if self.args.selection_file: selection = utils.read_hdf5(self.args.selection_file) log('[Visualization] read %s' % self.args.selection_file) selection = numpy.swapaxes(selection, 0, 1) selection = selection[:self.success.shape[0]] selection = selection >= 0 assert len(selection.shape) == len(self.success.shape) self.success = numpy.logical_and(self.success, selection) log('[Visualization] updated selection') self.accuracy = utils.read_hdf5(self.args.accuracy_file) log('[Visualization] read %s' % self.args.success_file) log('[Visualization] using %d input channels' % self.test_images.shape[3]) network_units = list(map(int, self.args.network_units.split(','))) self.model = models.Classifier( N_class, resolution=(self.test_images.shape[3], self.test_images.shape[1], self.test_images.shape[2]), architecture=self.args.network_architecture, activation=self.args.network_activation, batch_normalization=not self.args.network_no_batch_normalization, start_channels=self.args.network_channels, dropout=self.args.network_dropout, units=network_units) assert os.path.exists( self.args.classifier_file ), 'state file %s not found' % self.args.classifier_file state = State.load(self.args.classifier_file) log('[Visualization] read %s' % self.args.classifier_file) self.model.load_state_dict(state.model) if self.args.use_gpu and not cuda.is_cuda(self.model): log('[Visualization] classifier is not CUDA') self.model = self.model.cuda() log('[Visualization] loaded classifier') self.model.eval() log('[Visualization] set model to eval')
def load_data_and_model(self): """ Load data and model. """ self.test_images = utils.read_hdf5(self.args.test_images_file).astype( numpy.float32) if len(self.test_images.shape) < 4: self.test_images = numpy.expand_dims(self.test_images, axis=3) resolution = (self.test_images.shape[3], self.test_images.shape[1], self.test_images.shape[2]) log('[Visualization] read %s' % self.args.test_images_file) self.perturbations = utils.read_hdf5( self.args.perturbations_file).astype(numpy.float32) self.perturbations = numpy.swapaxes(self.perturbations, 0, 1) log('[Visualization] read %s' % self.args.perturbations_file) self.success = utils.read_hdf5(self.args.success_file) self.success = numpy.swapaxes(self.success, 0, 1) log('[Visualization] read %s' % self.args.success_file) self.accuracy = utils.read_hdf5(self.args.accuracy_file) log('[Visualization] read %s' % self.args.success_file) self.test_theta = utils.read_hdf5(self.args.test_theta_file).astype( numpy.float32) self.test_theta = self.test_theta[:self.perturbations.shape[0]] log('[Visualization] read %s' % self.args.test_theta_file) self.test_codes = utils.read_hdf5(self.args.test_codes_file).astype( numpy.int) self.test_codes = self.test_codes[:, self.args.label_index] self.N_class = numpy.max(self.test_codes) + 1 self.test_codes = self.test_codes[:self.perturbations.shape[0]] log('[Visualization] read %s' % self.args.test_codes_file) network_units = list(map(int, self.args.network_units.split(','))) self.classifier = models.Classifier( self.N_class, resolution=resolution, architecture=self.args.network_architecture, activation=self.args.network_activation, batch_normalization=not self.args.network_no_batch_normalization, start_channels=self.args.network_channels, dropout=self.args.network_dropout, units=network_units) assert os.path.exists( self.args.classifier_file ), 'state file %s not found' % self.args.classifier_file state = State.load(self.args.classifier_file) log('[Visualization] read %s' % self.args.classifier_file) self.classifier.load_state_dict(state.model) if self.args.use_gpu and not cuda.is_cuda(self.classifier): log('[Visualization] classifier is not CUDA') self.classifier = self.classifier.cuda() log('[Visualization] loaded classifier') self.classifier.eval() log('[Visualization] set classifier to eval') assert self.args.decoder_files decoder_files = self.args.decoder_files.split(',') for decoder_file in decoder_files: assert os.path.exists( decoder_file), 'could not find %s' % decoder_file log('[Visualization] using %d input channels' % self.test_images.shape[3]) decoder_units = list(map(int, self.args.decoder_units.split(','))) if len(decoder_files) > 1: log('[Visualization] loading multiple decoders') decoders = [] for i in range(len(decoder_files)): decoder = models.LearnedDecoder( self.args.latent_space_size, resolution=resolution, architecture=self.args.decoder_architecture, start_channels=self.args.decoder_channels, activation=self.args.decoder_activation, batch_normalization=not self.args. decoder_no_batch_normalization, units=decoder_units) state = State.load(decoder_files[i]) decoder.load_state_dict(state.model) if self.args.use_gpu and not cuda.is_cuda(decoder): decoder = decoder.cuda() decoders.append(decoder) decoder.eval() log('[Visualization] loaded %s' % decoder_files[i]) self.decoder = models.SelectiveDecoder(decoders, resolution=resolution) else: log('[Visualization] loading one decoder') decoder = models.LearnedDecoder( self.args.latent_space_size, resolution=resolution, architecture=self.args.decoder_architecture, start_channels=self.args.decoder_channels, activation=self.args.decoder_activation, batch_normalization=not self.args. decoder_no_batch_normalization, units=decoder_units) state = State.load(decoder_files[0]) decoder.load_state_dict(state.model) if self.args.use_gpu and not cuda.is_cuda(decoder): decoder = decoder.cuda() decoder.eval() log('[Visualization] read decoder') self.decoder = decoder
def load_data(self): """ Load data. """ assert self.args.batch_size % 4 == 0 self.database = utils.read_hdf5(self.args.database_file).astype( numpy.float32) log('[Training] read %s' % self.args.database_file) self.N_font = self.database.shape[0] self.N_class = self.database.shape[1] self.database = self.database.reshape( (self.database.shape[0] * self.database.shape[1], self.database.shape[2], self.database.shape[3])) self.database = torch.from_numpy(self.database) if self.args.use_gpu: self.database = self.database.cuda() self.database = torch.autograd.Variable(self.database, False) self.train_images = utils.read_hdf5( self.args.train_images_file).astype(numpy.float32) log('[Training] read %s' % self.args.train_images_file) self.test_images = utils.read_hdf5(self.args.test_images_file).astype( numpy.float32) log('[Training] read %s' % self.args.test_images_file) # For handling both color and gray images. if len(self.train_images.shape) < 4: self.train_images = numpy.expand_dims(self.train_images, axis=3) self.test_images = numpy.expand_dims(self.test_images, axis=3) log('[Training] no color images, adjusted size') self.resolution = self.train_images.shape[2] log('[Training] resolution %d' % self.resolution) self.train_codes = utils.read_hdf5(self.args.train_codes_file).astype( numpy.int) assert self.train_codes.shape[1] == 3 log('[Training] read %s' % self.args.train_codes_file) self.test_codes = utils.read_hdf5(self.args.test_codes_file).astype( numpy.int) assert self.test_codes.shape[1] == 3 log('[Training] read %s' % self.args.test_codes_file) self.train_theta = utils.read_hdf5(self.args.train_theta_file).astype( numpy.float32) log('[Training] read %s' % self.args.train_theta_file) self.min_bound = numpy.min(self.train_theta, axis=0) self.max_bound = numpy.max(self.train_theta, axis=0) self.test_theta = utils.read_hdf5(self.args.test_theta_file).astype( numpy.float32) log('[Training] read %s' % self.args.test_theta_file) assert self.train_codes.shape[0] == self.train_images.shape[0] assert self.test_codes.shape[0] == self.test_images.shape[0] assert self.train_theta.shape[0] == self.train_images.shape[0] assert self.test_theta.shape[0] == self.test_images.shape[0] # Select subset of samples if self.args.training_samples < 0: self.args.training_samples = self.train_images.shape[0] else: self.args.training_samples = min(self.args.training_samples, self.train_images.shape[0]) log('[Training] found %d classes' % self.N_class) log('[Training] using %d training samples' % self.args.training_samples) if self.args.test_samples < 0: self.args.test_samples = self.test_images.shape[0] else: self.args.test_samples = min(self.args.test_samples, self.test_images.shape[0]) if self.args.early_stopping: assert self.args.validation_samples > 0 assert self.args.training_samples + self.args.validation_samples <= self.train_images.shape[ 0] self.val_images = self.train_images[self.train_images.shape[0] - self.args.validation_samples:] self.val_codes = self.train_codes[self.train_codes.shape[0] - self.args.validation_samples:, self.args.label_index] self.train_images = self.train_images[:self.train_images.shape[0] - self.args.validation_samples] self.train_codes = self.train_codes[:self.train_codes.shape[0] - self.args.validation_samples] assert self.val_images.shape[ 0] == self.args.validation_samples and self.val_codes.shape[ 0] == self.args.validation_samples if self.args.random_samples: perm = numpy.random.permutation(self.train_images.shape[0] // 10) perm = perm[:self.args.training_samples // 10] perm = numpy.repeat(perm, self.N_class, axis=0) * 10 + numpy.tile( numpy.array(range(self.N_class)), (perm.shape[0])) self.train_images = self.train_images[perm] self.train_codes = self.train_codes[perm] self.train_theta = self.train_theta[perm] else: self.train_images = self.train_images[:self.args.training_samples] self.train_codes = self.train_codes[:self.args.training_samples] self.train_theta = self.train_theta[:self.args.training_samples] # Check that the dataset is balanced. number_samples = self.train_codes.shape[0] // self.N_class for c in range(self.N_class): number_samples_ = numpy.sum( self.train_codes[:, self.args.label_index] == c) if number_samples_ != number_samples: log( '[Training] dataset not balanced, class %d should have %d samples but has %d' % (c, number_samples, number_samples_), LogLevel.WARNING)
def load_data(self): """ Load data and model. """ self.test_images = utils.read_hdf5(self.args.test_images_file).astype(numpy.float32) log('[Testing] read %s' % self.args.test_images_file) # For handling both color and gray images. if len(self.test_images.shape) < 4: self.test_images = numpy.expand_dims(self.test_images, axis=3) log('[Testing] no color images, adjusted size') self.resolution = self.test_images.shape[2] log('[Testing] resolution %d' % self.resolution) self.train_images = utils.read_hdf5(self.args.train_images_file).astype(numpy.float32) # ! self.train_images = self.train_images.reshape((self.train_images.shape[0], -1)) log('[Testing] read %s' % self.args.train_images_file) self.test_theta = utils.read_hdf5(self.args.test_theta_file).astype(numpy.float32) log('[Testing] read %s' % self.args.test_theta_file) self.train_theta = utils.read_hdf5(self.args.train_theta_file).astype(numpy.float32) log('[Testing] read %s' % self.args.train_theta_file) self.test_codes = utils.read_hdf5(self.args.test_codes_file).astype(numpy.int) self.test_codes = self.test_codes[:, self.args.label_index] self.N_class = numpy.max(self.test_codes) + 1 log('[Testing] read %s' % self.args.test_codes_file) self.accuracy = utils.read_hdf5(self.args.accuracy_file) log('[Testing] read %s' % self.args.accuracy_file) self.perturbations = utils.read_hdf5(self.args.perturbations_file).astype(numpy.float32) self.N_attempts = self.perturbations.shape[0] assert not numpy.any(self.perturbations != self.perturbations), 'NaN in perturbations' # First, repeat relevant data. self.perturbation_theta = numpy.repeat(self.test_theta[:self.perturbations.shape[1]], self.N_attempts, axis=0) self.perturbation_codes = numpy.repeat(self.test_codes[:self.perturbations.shape[1]], self.N_attempts, axis=0) self.perturbation_codes = numpy.squeeze(self.perturbation_codes) self.accuracy = numpy.repeat(self.accuracy[:self.perturbations.shape[1]], self.N_attempts, axis=0) # Then, reshape the perturbations! self.perturbations = numpy.swapaxes(self.perturbations, 0, 1) self.perturbations = self.perturbations.reshape((self.perturbations.shape[0] * self.perturbations.shape[1], -1)) log('[Testing] read %s' % self.args.perturbations_file) self.success = utils.read_hdf5(self.args.success_file) self.success = numpy.swapaxes(self.success, 0, 1) self.success = self.success.reshape((self.success.shape[0] * self.success.shape[1])) log('[Testing] read %s' % self.args.success_file) assert self.args.decoder_files decoder_files = self.args.decoder_files.split(',') for decoder_file in decoder_files: assert os.path.exists(decoder_file), 'could not find %s' % decoder_file log('[Testing] using %d input channels' % self.test_images.shape[3]) decoder_units = list(map(int, self.args.decoder_units.split(','))) if len(decoder_files) > 1: log('[Testing] loading multiple decoders') decoders = [] for i in range(len(decoder_files)): decoder = models.LearnedDecoder(self.args.latent_space_size, resolution=(self.test_images.shape[3], self.test_images.shape[1], self.test_images.shape[2]), architecture=self.args.decoder_architecture, start_channels=self.args.decoder_channels, activation=self.args.decoder_activation, batch_normalization=not self.args.decoder_no_batch_normalization, units=decoder_units) state = State.load(decoder_files[i]) decoder.load_state_dict(state.model) if self.args.use_gpu and not cuda.is_cuda(decoder): decoder = decoder.cuda() decoders.append(decoder) decoder.eval() log('[Testing] loaded %s' % decoder_files[i]) self.model = models.SelectiveDecoder(decoders, resolution=(self.test_images.shape[3], self.test_images.shape[1], self.test_images.shape[2])) else: log('[Testing] loading one decoder') decoder = models.LearnedDecoder(self.args.latent_space_size, resolution=(self.test_images.shape[3], self.test_images.shape[1], self.test_images.shape[2]), architecture=self.args.decoder_architecture, start_channels=self.args.decoder_channels, activation=self.args.decoder_activation, batch_normalization=not self.args.decoder_no_batch_normalization, units=decoder_units) state = State.load(decoder_files[0]) decoder.load_state_dict(state.model) if self.args.use_gpu and not cuda.is_cuda(decoder): decoder = decoder.cuda() decoder.eval() log('[Testing] read decoder') self.model = decoder