def test(self): """ Test classifier to identify valid samples to attack. """ num_batches = int(math.ceil(self.test_images.shape[0] / self.args.batch_size)) self.model.eval() assert self.model.training is False assert self.test_images.shape[0] == self.test_codes.shape[0], 'number of samples have to match' for b in range(num_batches): b_start = b * self.args.batch_size b_end = min((b + 1) * self.args.batch_size, self.test_images.shape[0]) batch_images = common.torch.as_variable(self.test_images[b_start: b_end], self.args.use_gpu) batch_classes = common.torch.as_variable(self.test_codes[b_start: b_end], self.args.use_gpu) batch_images = batch_images.permute(0, 3, 1, 2) output_classes = self.model(batch_images) values, indices = torch.max(torch.nn.functional.softmax(output_classes, dim=1), dim=1) errors = torch.abs(indices - batch_classes) self.accuracy = common.numpy.concatenate(self.accuracy, errors.data.cpu().numpy()) if b % 100 == 0: log('[Attack] computing accuracy %d' % b) self.accuracy = self.accuracy == 0 utils.write_hdf5(self.args.accuracy_file, self.accuracy) log('[Attack] wrote %s' % self.args.accuracy_file) accuracy = numpy.sum(self.accuracy)/float(self.accuracy.shape[0]) log('[Attack] accuracy %g' % accuracy) accuracy = numpy.sum(self.accuracy[:self.args.max_samples]) / float(self.args.max_samples) log('[Attack] accuracy on %d samples %g' % (self.args.max_samples, accuracy))
def test_random(self): """ Test random. """ pred_images = None codes = numpy.random.normal( 0, 1, (1000, self.args.latent_space_size)).astype(numpy.float32) num_batches = int(math.ceil(codes.shape[0] / self.args.batch_size)) for b in range(num_batches): b_start = b * self.args.batch_size b_end = min((b + 1) * self.args.batch_size, self.test_images.shape[0]) batch_codes = common.torch.as_variable(codes[b_start:b_end], self.args.use_gpu) # To get the correct images! output_images = self.decoder(batch_codes) output_images = numpy.squeeze( numpy.transpose(output_images.cpu().detach().numpy(), (0, 2, 3, 1))) pred_images = common.numpy.concatenate(pred_images, output_images) if b % 100 == 50: log('[Testing] %d' % b) utils.write_hdf5(self.args.random_file, pred_images) log('[Testing] wrote %s' % self.args.random_file)
def main(self): """ Main method. """ database = utils.read_hdf5(self.args.database_file) log('[Data] read %s' % self.args.database_file) N_font = database.shape[0] N_class = database.shape[1] assert database.shape[2] == database.shape[3] database = database.reshape((database.shape[0] * database.shape[1], database.shape[2], database.shape[3])) database = torch.from_numpy(database).float() if self.args.use_gpu: database = database.cuda() database = torch.autograd.Variable(database) codes = utils.read_hdf5(self.args.codes_file) codes = codes[:, 0] codes = common.numpy.one_hot(codes, N_font * N_class) log('[Data] read %s' % self.args.codes_file) theta = utils.read_hdf5(self.args.theta_file) N = theta.shape[0] N_theta = theta.shape[1] log('[Data] read %s' % self.args.theta_file) model = models.OneHotDecoder(database, N_theta) images = [] num_batches = int(math.ceil(float(N) / self.args.batch_size)) for b in range(num_batches): batch_theta = torch.from_numpy( theta[b * self.args.batch_size:min((b + 1) * self.args.batch_size, N)]) batch_codes = torch.from_numpy( codes[b * self.args.batch_size:min((b + 1) * self.args.batch_size, N)]) batch_codes, batch_theta = batch_codes.float(), batch_theta.float() if self.args.use_gpu: batch_codes, batch_theta = batch_codes.cuda( ), batch_theta.cuda() batch_codes, batch_theta = torch.autograd.Variable( batch_codes), torch.autograd.Variable(batch_theta) output = model(batch_codes, batch_theta) images.append(output.data.cpu().numpy().squeeze()) if b % 1000 == 0: log('[Data] processed %d/%d batches' % (b + 1, num_batches)) images = numpy.concatenate(images, axis=0) if len(images.shape) > 3: images = numpy.transpose(images, (0, 2, 3, 1)) utils.write_hdf5(self.args.images_file, images) log('[Data] wrote %s' % self.args.images_file)
def main(self): """ Main. """ train_images_file = paths.celeba_train_images_file() test_images_file = paths.celeba_test_images_file() assert os.path.exists(train_images_file) assert os.path.exists(test_images_file) train_images = utils.read_hdf5(train_images_file) log('read %s' % train_images_file) test_images = utils.read_hdf5(test_images_file) log('read %s' % test_images_file) log('[Data] before train: %g %g' % (numpy.min(train_images), numpy.max(train_images))) log('[Data] before test: %g %g' % (numpy.min(train_images), numpy.max(train_images))) train_images *= 255 test_images *= 255 log('[Data] after train: %g %g' % (numpy.min(train_images), numpy.max(train_images))) log('[Data] after test: %g %g' % (numpy.min(train_images), numpy.max(train_images))) utils.write_hdf5(train_images_file, train_images.astype(numpy.float32)) log('[Data] wrote %s' % train_images_file) utils.write_hdf5(test_images_file, test_images.astype(numpy.float32)) log('[Data] wrote %s' % test_images_file)
def convert_dataset(): """ Convert MNIST. """ filenames = [ [paths.raw_mnist_train_images_file(), paths.mnist_train_images_file()], [paths.raw_mnist_test_images_file(), paths.mnist_test_images_file()], [paths.raw_mnist_train_labels_file(), paths.mnist_train_labels_file()], [paths.raw_mnist_test_labels_file(), paths.mnist_test_labels_file()] ] for names in filenames[:2]: with gzip.open(names[0], 'rb') as f: data = numpy.frombuffer(f.read(), numpy.uint8, offset=16).reshape(-1, 28, 28, 1) #data = data.swapaxes(1, 2) data = data.astype(numpy.float32) / 255. utils.write_hdf5(names[1], data) log('wrote %s' % names[1]) for names in filenames[-2:]: with gzip.open(names[0], 'rb') as f: utils.write_hdf5( names[1], numpy.frombuffer(f.read(), numpy.uint8, offset=8).reshape(-1, 1).astype(numpy.int)) log('wrote %s' % names[1])
def loop(self): """ Main loop for training and testing, saving ... """ while self.epoch < self.args.epochs: log('[Training] %s' % self.scheduler.report()) # Note that we test first, to also get the error of the untrained model. testing = elapsed(functools.partial(self.test)) training = elapsed(functools.partial(self.train)) log('[Training] %gs training, %gs testing' % (training, testing)) if self.args.early_stopping: validation = elapsed(functools.partial(self.validate)) log('[Training] %gs validation' % validation) # Save model checkpoint after each epoch. utils.remove(self.args.state_file + '.%d' % (self.epoch - 1)) State.checkpoint(self.model, self.scheduler.optimizer, self.epoch, self.args.state_file + '.%d' % self.epoch) log('[Training] %d: checkpoint' % self.epoch) torch.cuda.empty_cache() # necessary? # Save statistics and plots. if self.args.training_file: utils.write_hdf5(self.args.training_file, self.train_statistics) log('[Training] %d: wrote %s' % (self.epoch, self.args.training_file)) if self.args.testing_file: utils.write_hdf5(self.args.testing_file, self.test_statistics) log('[Training] %d: wrote %s' % (self.epoch, self.args.testing_file)) if utils.display(): self.plot() self.epoch += 1 # ! # Final testing. testing = elapsed(functools.partial(self.test)) log('[Training] %gs testing' % (testing)) # Save model checkpoint after each epoch. utils.remove(self.args.state_file + '.%d' % (self.epoch - 1)) State.checkpoint(self.model, self.scheduler.optimizer, self.epoch, self.args.state_file) log('[Training] %d: checkpoint' % self.epoch) self.results = { 'training_statistics': self.train_statistics, 'testing_statistics': self.test_statistics, } if self.args.results_file: utils.write_pickle(self.args.results_file, self.results) log('[Training] wrote %s' % self.args.results_file)
def test(self): """ Test classifier to identify valid samples to attack. """ num_batches = int( math.ceil(self.perturbations.shape[0] / self.args.batch_size)) for b in range(num_batches): b_start = b * self.args.batch_size b_end = min((b + 1) * self.args.batch_size, self.perturbations.shape[0]) batch_fonts = self.test_fonts[b_start:b_end] batch_classes = self.test_classes[b_start:b_end] batch_code = numpy.concatenate( (common.numpy.one_hot(batch_fonts, self.N_font), common.numpy.one_hot(batch_classes, self.N_class)), axis=1).astype(numpy.float32) batch_inputs = common.torch.as_variable( self.perturbations[b_start:b_end], self.args.use_gpu) batch_code = common.torch.as_variable(batch_code, self.args.use_gpu) # This basically allows to only optimize over theta, keeping the font/class code fixed. self.model.set_code(batch_code) output_images = self.model(batch_inputs) output_images = numpy.squeeze( numpy.transpose(output_images.cpu().detach().numpy(), (0, 2, 3, 1))) self.perturbation_images = common.numpy.concatenate( self.perturbation_images, output_images) if b % 100 == 0: log('[Testing] computing perturbation images %d' % b) utils.makedir(os.path.dirname(self.args.perturbation_images_file)) if len(self.perturbation_images.shape) > 3: self.perturbation_images = self.perturbation_images.reshape( self.N_samples, self.N_attempts, self.perturbation_images.shape[1], self.perturbation_images.shape[2], self.perturbation_images.shape[3]) else: self.perturbation_images = self.perturbation_images.reshape( self.N_samples, self.N_attempts, self.perturbation_images.shape[1], self.perturbation_images.shape[2]) self.perturbation_images = numpy.swapaxes(self.perturbation_images, 0, 1) utils.write_hdf5(self.args.perturbation_images_file, self.perturbation_images) log('[Testing] wrote %s' % self.args.perturbation_images_file)
def download(): """ Download and convert Cifar10. """ trainset = torchvision.datasets.CIFAR10(root=paths.raw_cifar10_dir(), train=True, download=True) testset = torchvision.datasets.CIFAR10(root=paths.raw_cifar10_dir(), train=False, download=True) train_images = numpy.array(trainset.train_data) train_labels = numpy.array(trainset.train_labels) test_images = numpy.array(testset.test_data) test_labels = numpy.array(testset.test_labels) assert numpy.max(train_images) == 255 train_images = train_images / 255. test_images = test_images / 255. utils.write_hdf5(paths.cifar10_train_images_file(), train_images.astype(numpy.float32)) log('wrote %s' % paths.cifar10_train_images_file()) utils.write_hdf5(paths.cifar10_test_images_file(), test_images.astype(numpy.float32)) log('wrote %s' % paths.cifar10_test_images_file()) utils.write_hdf5(paths.cifar10_train_labels_file(), train_labels.reshape(-1, 1).astype(numpy.int)) log('wrote %s' % paths.cifar10_train_labels_file()) utils.write_hdf5(paths.cifar10_test_labels_file(), test_labels.reshape(-1, 1).astype(numpy.int)) log('wrote %s' % paths.cifar10_test_labels_file())
def convert_dataset(): """ Convert SVHN. """ data = sio.loadmat(paths.raw_svhn_train_file()) # access to the dict images = data['X'] images = images.transpose(3, 0, 1, 2) images = images / 255. labels = data['y'] - 1 #print(images[0], numpy.max(images), numpy.min(images)) utils.write_hdf5(paths.svhn_train_images_file(), images.astype(numpy.float32)) log('wrote %s' % paths.svhn_train_images_file()) utils.write_hdf5(paths.svhn_train_labels_file(), labels.reshape(-1, 1).astype(numpy.int)) log('wrote %s' % paths.svhn_train_labels_file()) data = sio.loadmat(paths.raw_svhn_test_file()) # access to the dict images = data['X'] images = images.transpose(3, 0, 1, 2) images = images / 255. labels = data['y'] - 1 utils.write_hdf5(paths.svhn_test_images_file(), images.astype(numpy.float32)) log('wrote %s' % paths.svhn_test_images_file()) utils.write_hdf5(paths.svhn_test_labels_file(), labels.reshape(-1, 1).astype(numpy.int)) log('wrote %s' % paths.svhn_test_labels_file())
def test(self): """ Test classifier to identify valid samples to attack. """ num_batches = int( math.ceil(self.perturbations.shape[0] / self.args.batch_size)) for b in range(num_batches): b_start = b * self.args.batch_size b_end = min((b + 1) * self.args.batch_size, self.perturbations.shape[0]) batch_images = common.torch.as_variable( self.test_images[b_start:b_end], self.args.use_gpu) batch_inputs = common.torch.as_variable( self.perturbations[b_start:b_end], self.args.use_gpu) self.model.set_image(batch_images) output_images = self.model(batch_inputs) output_images = numpy.squeeze( numpy.transpose(output_images.cpu().detach().numpy(), (0, 2, 3, 1))) self.perturbation_images = common.numpy.concatenate( self.perturbation_images, output_images) if b % 100 == 0: log('[Testing] computing perturbation images %d' % b) utils.makedir(os.path.dirname(self.args.perturbation_images_file)) if len(self.perturbation_images.shape) > 3: self.perturbation_images = self.perturbation_images.reshape( self.N_samples, self.N_attempts, self.perturbation_images.shape[1], self.perturbation_images.shape[2], self.perturbation_images.shape[3]) else: self.perturbation_images = self.perturbation_images.reshape( self.N_samples, self.N_attempts, self.perturbation_images.shape[1], self.perturbation_images.shape[2]) self.perturbation_images = numpy.swapaxes(self.perturbation_images, 0, 1) utils.write_hdf5(self.args.perturbation_images_file, self.perturbation_images) log('[Testing] wrote %s' % self.args.perturbation_images_file)
def main(self): """ Main. """ with open(paths.raw_celeba_labels_file(), 'r') as f: lines = f.read().split('\n') lines = [line for line in lines if line] lines = lines[1:] attributes = [str(attribute) for attribute in lines[0].split(' ') if attribute] lines = lines[1:] labels = [] for line in lines: values = [int(value) for value in line.split(' ')[1:] if value] assert len(values) == len(attributes) labels.append(values) labels = numpy.array(labels) labels[labels == -1] = 0 def statistics(labels): """ Label statistics. """ for i in range(len(attributes)): positive = numpy.sum(labels[:, i] == 1) negative = numpy.sum(labels[:, i] == 0) log('%d. attribute %s: %d %d' % (i, attributes[i], positive, negative)) N = labels.shape[0] N_train = int(0.9*N) train_labels = labels[:N_train] test_labels = labels[N_train:] statistics(labels) statistics(train_labels) statistics(test_labels) utils.write_hdf5(paths.celeba_train_labels_file(), train_labels.reshape(-1, 1).astype(numpy.int)) log('wrote %s' % paths.celeba_train_labels_file()) utils.write_hdf5(paths.celeba_test_labels_file(), test_labels.reshape(-1, 1).astype(numpy.int)) log('wrote %s' % paths.celeba_test_labels_file())
def sample(self): """ Test the model. """ assert self.decoder is not None self.decoder.eval() log('[Sampling] set decoder to eval') images = None theta = common.numpy.truncated_normal( (self.args.N_samples, self.args.latent_space_size), lower=-self.args.bound, upper=self.args.bound).astype(numpy.float32) theta = theta.astype(numpy.float32) num_batches = int(math.ceil(theta.shape[0] / self.args.batch_size)) for b in range(num_batches): b_start = b * self.args.batch_size b_end = min((b + 1) * self.args.batch_size, theta.shape[0]) batch_theta = common.torch.as_variable(theta[b_start:b_end], self.args.use_gpu) # Important to get the correct codes! assert self.decoder.training is False output_images = self.decoder(batch_theta) output_images = numpy.squeeze( numpy.transpose(output_images.cpu().detach().numpy(), (0, 2, 3, 1))) images = common.numpy.concatenate(images, output_images) if b % 100 == 50: log('[Sampling] %d' % b) if self.args.images_file: utils.write_hdf5(self.args.images_file, images) log('[Sampling] wrote %s' % self.args.images_file) if self.args.theta_file: utils.write_hdf5(self.args.theta_file, theta) log('[Sampling] wrote %s' % self.args.theta_file)
def test(self): """ Test classifier to identify valid samples to attack. """ num_batches = int( math.ceil(self.test_theta.shape[0] / self.args.batch_size)) for b in range(num_batches): b_start = b * self.args.batch_size b_end = min((b + 1) * self.args.batch_size, self.test_theta.shape[0]) batch_classes = common.torch.as_variable( self.test_codes[b_start:b_end], self.args.use_gpu) batch_inputs = common.torch.as_variable( self.test_theta[b_start:b_end], self.args.use_gpu) if isinstance(self.model.decoder, models.SelectiveDecoder): self.model.decoder.set_code(batch_classes) output_classes = self.model(batch_inputs) values, indices = torch.max(torch.nn.functional.softmax( output_classes, dim=1), dim=1) errors = torch.abs(indices - batch_classes) self.accuracy = common.numpy.concatenate(self.accuracy, errors.data.cpu().numpy()) if b % 100 == 0: log('[Attack] computing accuracy %d' % b) self.accuracy = self.accuracy == 0 utils.write_hdf5(self.args.accuracy_file, self.accuracy) log('[Attack] wrote %s' % self.args.accuracy_file) accuracy = numpy.sum(self.accuracy) / float(self.accuracy.shape[0]) log('[Attack] accuracy %g' % accuracy) accuracy = numpy.sum(self.accuracy[:self.args.max_samples]) / float( self.args.max_samples) log('[Attack] accuracy on %d samples %g' % (self.args.max_samples, accuracy))
def test_interpolation(self): """ Test interpolation. """ interpolations = None perm = numpy.random.permutation( numpy.array(range(self.pred_codes.shape[0]))) for i in range(50): first = self.pred_codes[i] second = self.pred_codes[perm[i]] linfit = scipy.interpolate.interp1d([0, 1], numpy.vstack([first, second]), axis=0) interpolations = common.numpy.concatenate( interpolations, linfit(numpy.linspace(0, 1, 10))) pred_images = None num_batches = int( math.ceil(interpolations.shape[0] / self.args.batch_size)) interpolations = interpolations.astype(numpy.float32) for b in range(num_batches): b_start = b * self.args.batch_size b_end = min((b + 1) * self.args.batch_size, self.test_images.shape[0]) batch_codes = common.torch.as_variable( interpolations[b_start:b_end], self.args.use_gpu) # To get the correct images! output_images = self.decoder(batch_codes) output_images = numpy.squeeze( numpy.transpose(output_images.cpu().detach().numpy(), (0, 2, 3, 1))) pred_images = common.numpy.concatenate(pred_images, output_images) if b % 100 == 50: log('[Testing] %d' % b) utils.write_hdf5(self.args.interpolation_file, pred_images) log('[Testing] wrote %s' % self.args.interpolation_file)
def create_hdf5(img_h5_path, label_h5_path, img_label_path, sep=',', keys='tensor'): assert os.path.isfile(img_label_path) df = pd.read_csv(img_label_path, sep=sep, names=['name', 'label']) # Store image names and labels in numpy arrays img_paths = df['name'].to_numpy().astype(str) labels = df['label'].to_numpy().astype(np.int) # Check that labels are scalar integers assert len(labels.shape) == 1 # Check that number of images and labels are equal assert img_paths.shape[0] == labels.shape[0] transform = transforms.ToTensor() images = np.empty((len(img_paths), ) + tuple( transform(Image.open(img_paths[0]).convert('RGB')).permute(1, 2, 0).shape), dtype=np.float32) for idx, img_path in enumerate(img_paths): img = transform(Image.open(img_path).convert('RGB')) # Change from C x H x W format to H x W x C format as expected by test.attack assert len(img.shape) == 3 img = img.permute(1, 2, 0) images[idx] = img print("Writing image hdf5...") utils.write_hdf5(img_h5_path, images, keys) print("Writing label hdf5...") utils.write_hdf5(label_h5_path, labels, keys)
def main(self): """ Main. """ filepaths = utils.read_ordered_directory(paths.raw_celeba_images_dir()) log('reading %s' % paths.raw_celeba_images_dir()) images = [] for filepath in filepaths: log('processing %s' % os.path.basename(filepath)) image = imageio.imread(filepath) width = 54 height = int(width*image.shape[0]/float(image.shape[1])) image = skimage.transform.resize(image, (height, width)) image = image[5:image.shape[0] - 5, 3:image.shape[1]-3, :] # Note that images are already scaled to [0, 1] here! #image = image/255. #print(numpy.min(image), numpy.max(image)) assert numpy.min(image) >= 0 and numpy.max(image) <= 1 images.append(image) #print(image.shape) #pyplot.imshow(image) #pyplot.show() images = numpy.array(images) log('%g %g' % (numpy.min(images), numpy.max(images))) N = images.shape[0] N_train = int(0.9 * N) train_images = images[:N_train] test_images = images[N_train:] utils.write_hdf5(paths.celeba_train_images_file(), train_images.astype(numpy.float32)) log('wrote %s' % paths.celeba_train_images_file()) utils.write_hdf5(paths.celeba_test_images_file(), test_images.astype(numpy.float32)) log('wrote %s' % paths.celeba_test_images_file())
def test(self): """ Test classifier to identify valid samples to attack. """ num_batches = int(math.ceil(self.test_theta.shape[0] / self.args.batch_size)) for b in range(num_batches): b_start = b * self.args.batch_size b_end = min((b + 1) * self.args.batch_size, self.test_theta.shape[0]) batch_fonts = self.test_fonts[b_start: b_end] batch_classes = self.test_classes[b_start: b_end] batch_code = numpy.concatenate((common.numpy.one_hot(batch_fonts, self.N_font), common.numpy.one_hot(batch_classes, self.N_class)), axis=1).astype(numpy.float32) batch_classes = common.torch.as_variable(batch_classes, self.args.use_gpu) batch_inputs = common.torch.as_variable(self.test_theta[b_start: b_end], self.args.use_gpu) batch_code = common.torch.as_variable(batch_code, self.args.use_gpu) # This basically allows to only optimize over theta, keeping the font/class code fixed. self.model.decoder.set_code(batch_code) output_classes = self.model(batch_inputs) values, indices = torch.max(torch.nn.functional.softmax(output_classes, dim=1), dim=1) errors = torch.abs(indices - batch_classes) self.accuracy = common.numpy.concatenate(self.accuracy, errors.data.cpu().numpy()) if b % 100 == 0: log('[Attack] computing accuracy %d' % b) self.accuracy = self.accuracy == 0 utils.write_hdf5(self.args.accuracy_file, self.accuracy) log('[Attack] wrote %s' % self.args.accuracy_file) accuracy = numpy.sum(self.accuracy) / float(self.accuracy.shape[0]) log('[Attack] accuracy %g' % accuracy) accuracy = numpy.sum(self.accuracy[:self.args.max_samples]) / float(self.args.max_samples) log('[Attack] accuracy on %d samples %g' % (self.args.max_samples, accuracy))
def main(self): """ Main method. """ codes = utils.read_hdf5(self.args.codes_file) log('[Data] read %s' % self.args.codes_file) theta = utils.read_hdf5(self.args.theta_file) log('[Data] read %s' % self.args.theta_file) images = utils.read_hdf5(self.args.images_file) log('[Data] read %s' % self.args.images_file) # # The set is not splitted randomly or so. # This simplifies training set subselection while enforcing balanced datasets. # For example, for 10 classes, every subset that is a multiple of 10 will # be balanced by construction. # N = codes.shape[0] N_train = self.args.N_train train_codes = codes[:N_train] test_codes = codes[N_train:] train_theta = theta[:N_train] test_theta = theta[N_train:] train_images = images[:N_train] test_images = images[N_train:] utils.write_hdf5(self.args.train_codes_file, train_codes) log('[Data] wrote %s' % self.args.train_codes_file) utils.write_hdf5(self.args.test_codes_file, test_codes) log('[Data] wrote %s' % self.args.test_codes_file) utils.write_hdf5(self.args.train_theta_file, train_theta) log('[Data] wrote %s' % self.args.train_theta_file) utils.write_hdf5(self.args.test_theta_file, test_theta) log('[Data] wrote %s' % self.args.test_theta_file) utils.write_hdf5(self.args.train_images_file, train_images) log('[Data] wrote %s' % self.args.train_images_file) utils.write_hdf5(self.args.test_images_file, test_images) log('[Data] wrote %s' % self.args.test_images_file)
def test(self): """ Test the model. """ assert self.model is not None assert self.model.training is False assert self.test_images.shape[0] == self.test_codes.shape[ 0], 'number of samples have to match' self.loss = 0. self.error = 0. num_batches = int( math.ceil(self.test_images.shape[0] / self.args.batch_size)) for b in range(num_batches): b_start = b * self.args.batch_size b_end = min((b + 1) * self.args.batch_size, self.test_images.shape[0]) batch_images = common.torch.as_variable( self.test_images[b_start:b_end], self.args.use_gpu) batch_classes = common.torch.as_variable( self.test_codes[b_start:b_end], self.args.use_gpu) batch_images = batch_images.permute(0, 3, 1, 2) output_classes = self.model(batch_images) e = torch.nn.functional.cross_entropy(output_classes, batch_classes, size_average=True) self.loss += e.item() values, indices = torch.max(torch.nn.functional.softmax( output_classes, dim=1), dim=1) errors = torch.abs(indices - batch_classes) e = torch.sum(errors > 0).float() / batch_classes.size()[0] self.error += e.item() self.accuracy = common.numpy.concatenate(self.accuracy, errors.data.cpu().numpy()) self.loss /= num_batches self.error /= num_batches log('[Testing] test loss %g; test error %g' % (self.loss, self.error)) self.accuracy = self.accuracy == 0 if self.args.accuracy_file: utils.write_hdf5(self.args.accuracy_file, self.accuracy) log('[Testing] wrote %s' % self.args.accuracy_file) accuracy = numpy.sum(self.accuracy) / self.accuracy.shape[0] if numpy.abs(1 - accuracy - self.error) < 1e-4: log('[Testing] accuracy file is with %g accuracy correct' % accuracy) self.results = { 'loss': self.loss, 'error': self.error, } if self.args.results_file: utils.write_pickle(self.args.results_file, self.results) log('[Testing] wrote %s' % self.args.results_file)
def test(self): """ Test classifier to identify valid samples to attack. """ self.model.eval() assert self.model.training is False assert self.perturbation_codes.shape[0] == self.perturbations.shape[0] assert self.test_codes.shape[0] == self.test_images.shape[0] assert len(self.perturbations.shape) == 4 assert len(self.test_images.shape) == 4 perturbations_accuracy = None num_batches = int(math.ceil(self.perturbations.shape[0] / self.args.batch_size)) for b in range(num_batches): b_start = b * self.args.batch_size b_end = min((b + 1) * self.args.batch_size, self.perturbations.shape[0]) batch_perturbations = common.torch.as_variable(self.perturbations[b_start: b_end], self.args.use_gpu) batch_classes = common.torch.as_variable(self.perturbation_codes[b_start: b_end], self.args.use_gpu) batch_perturbations = batch_perturbations.permute(0, 3, 1, 2) output_classes = self.model(batch_perturbations) values, indices = torch.max(torch.nn.functional.softmax(output_classes, dim=1), dim=1) errors = torch.abs(indices - batch_classes) perturbations_accuracy = common.numpy.concatenate(perturbations_accuracy, errors.data.cpu().numpy()) for n in range(batch_perturbations.size(0)): log('[Testing] %d: original success=%d, transfer accuracy=%d' % (n, self.original_success[b_start + n], errors[n].item())) self.transfer_success[perturbations_accuracy == 0] = -1 self.transfer_success = self.transfer_success.reshape((self.N_samples, self.N_attempts)) self.transfer_success = numpy.swapaxes(self.transfer_success, 0, 1) utils.makedir(os.path.dirname(self.args.transfer_success_file)) utils.write_hdf5(self.args.transfer_success_file, self.transfer_success) log('[Testing] wrote %s' % self.args.transfer_success_file) num_batches = int(math.ceil(self.test_images.shape[0] / self.args.batch_size)) for b in range(num_batches): b_start = b * self.args.batch_size b_end = min((b + 1) * self.args.batch_size, self.test_images.shape[0]) batch_images = common.torch.as_variable(self.test_images[b_start: b_end], self.args.use_gpu) batch_classes = common.torch.as_variable(self.test_codes[b_start: b_end], self.args.use_gpu) batch_images = batch_images.permute(0, 3, 1, 2) output_classes = self.model(batch_images) values, indices = torch.max(torch.nn.functional.softmax(output_classes, dim=1), dim=1) errors = torch.abs(indices - batch_classes) self.transfer_accuracy = common.numpy.concatenate(self.transfer_accuracy, errors.data.cpu().numpy()) if b % 100 == 0: log('[Testing] computing accuracy %d' % b) self.transfer_accuracy = self.transfer_accuracy == 0 log('[Testing] original accuracy=%g' % (numpy.sum(self.original_accuracy)/float(self.original_accuracy.shape[0]))) log('[Testing] transfer accuracy=%g' % (numpy.sum(self.transfer_accuracy)/float(self.transfer_accuracy.shape[0]))) log('[Testing] accuracy difference=%g' % (numpy.sum(self.transfer_accuracy != self.original_accuracy)/float(self.transfer_accuracy.shape[0]))) log('[Testing] accuracy difference on %d samples=%g' % (self.N_samples, numpy.sum(self.transfer_accuracy[:self.N_samples] != self.original_accuracy[:self.N_samples])/float(self.N_samples))) self.transfer_accuracy = numpy.logical_and(self.transfer_accuracy, self.original_accuracy) utils.makedir(os.path.dirname(self.args.transfer_accuracy_file)) utils.write_hdf5(self.args.transfer_accuracy_file, self.transfer_accuracy) log('[Testing] wrote %s' % self.args.transfer_accuracy_file)
def attack(self): """ Test the model. """ assert self.model is not None assert self.model.classifier.training is False concatenate_axis = -1 if os.path.exists(self.args.perturbations_file) and os.path.exists( self.args.success_file): self.original_perturbations = utils.read_hdf5( self.args.perturbations_file) assert len(self.original_perturbations.shape) == 3 log('[Attack] read %s' % self.args.perturbations_file) self.original_success = utils.read_hdf5(self.args.success_file) log('[Attack] read %s' % self.args.success_file) assert self.original_perturbations.shape[ 0] == self.original_success.shape[0] assert self.original_perturbations.shape[ 1] == self.original_success.shape[1] if self.original_perturbations.shape[ 1] <= self.args.max_samples and self.original_perturbations.shape[ 0] <= self.args.max_attempts: log('[Attack] found %d attempts, %d samples, requested no more' % (self.original_perturbations.shape[0], self.original_perturbations.shape[1])) return elif self.original_perturbations.shape[ 0] == self.args.max_attempts or self.original_perturbations.shape[ 1] == self.args.max_samples: if self.original_perturbations.shape[ 0] == self.args.max_attempts: self.test_images = self.test_images[ self.original_perturbations.shape[1]:] self.test_codes = self.test_codes[ self.original_perturbations.shape[1]:] self.args.max_samples = self.args.max_samples - self.original_perturbations.shape[ 1] concatenate_axis = 1 log('[Attack] found %d attempts with %d perturbations, computing %d more perturbations' % (self.original_perturbations.shape[0], self.original_perturbations.shape[1], self.args.max_samples)) elif self.original_perturbations.shape[ 1] == self.args.max_samples: self.args.max_attempts = self.args.max_attempts - self.original_perturbations.shape[ 0] concatenate_axis = 0 log('[Attack] found %d attempts with %d perturbations, computing %d more attempts' % (self.original_perturbations.shape[0], self.original_perturbations.shape[1], self.args.max_attempts)) self.perturbations = numpy.zeros( (self.args.max_attempts, self.args.max_samples, self.args.N_theta)) self.success = numpy.ones( (self.args.max_attempts, self.args.max_samples), dtype=int) * -1 if self.args.attack.find('Batch') >= 0: batch_size = min(self.args.batch_size, self.args.max_samples) else: batch_size = 1 objective = self.objective_class() num_batches = int(math.ceil(self.args.max_samples / batch_size)) for i in range(num_batches): if i * batch_size == self.args.max_samples: break i_start = i * batch_size i_end = min((i + 1) * batch_size, self.args.max_samples) batch_classes = common.torch.as_variable( self.test_codes[i_start:i_end], self.args.use_gpu) batch_theta = common.torch.as_variable( numpy.zeros((i_end - i_start, self.args.N_theta), dtype=numpy.float32), self.args.use_gpu) if self.args.N_theta > 4: batch_theta[:, 4] = 1 batch_images = common.torch.as_variable( self.test_images[i_start:i_end], self.args.use_gpu) batch_images = batch_images.permute(0, 3, 1, 2) self.model.decoder.set_image(batch_images) #output_images = self.model.decoder.forward(batch_theta) #error = torch.sum(torch.abs(output_images - batch_images)) #error = error.item() #print(error) #from matplotlib import pyplot #output_images = numpy.squeeze(numpy.transpose(output_images.cpu().detach().numpy(), (0, 2, 3, 1))) #pyplot.imshow(output_images[0]) #pyplot.show() t = 0 while True and t < self.args.max_attempts: attack = self.setup_attack(batch_theta, batch_classes) success, perturbations, probabilities, norm, _ = attack.run( objective) assert not numpy.any( perturbations != perturbations), perturbations # Note that we save the perturbed image, not only the perturbation! perturbations = perturbations.reshape(batch_theta.size( )) # hack for when only one dimensional latent space is used! self.perturbations[t][ i_start:i_end] = perturbations + batch_theta.cpu().detach( ).numpy() self.success[t][i_start:i_end] = success t += 1 log('[Attack] %d: completed' % i) if concatenate_axis >= 0: if self.perturbations.shape[0] == self.args.max_attempts: self.perturbations = numpy.concatenate( (self.original_perturbations, self.perturbations), axis=concatenate_axis) self.success = numpy.concatenate( (self.original_success, self.success), axis=concatenate_axis) log('[Attack] concatenated') utils.write_hdf5(self.args.perturbations_file, self.perturbations) log('[Attack] wrote %s' % self.args.perturbations_file) utils.write_hdf5(self.args.success_file, self.success) log('[Attack] wrote %s' % self.args.success_file)
def test_test(self): """ Test on testing set. """ num_batches = int( math.ceil(self.test_images.shape[0] / self.args.batch_size)) for b in range(num_batches): b_start = b * self.args.batch_size b_end = min((b + 1) * self.args.batch_size, self.test_images.shape[0]) batch_images = common.torch.as_variable( self.test_images[b_start:b_end], self.args.use_gpu) batch_images = batch_images.permute(0, 3, 1, 2) # Important to get the correct codes! output_codes, output_logvar = self.encoder(batch_images) output_images = self.decoder(output_codes) e = self.reconstruction_loss(batch_images, output_images) self.reconstruction_error += e.data self.code_mean += torch.mean(output_codes).item() self.code_var += torch.var(output_codes).item() output_images = numpy.squeeze( numpy.transpose(output_images.cpu().detach().numpy(), (0, 2, 3, 1))) self.pred_images = common.numpy.concatenate( self.pred_images, output_images) output_codes = output_codes.cpu().detach().numpy() self.pred_codes = common.numpy.concatenate(self.pred_codes, output_codes) if b % 100 == 50: log('[Testing] %d' % b) assert self.pred_images.shape[0] == self.test_images.shape[ 0], 'computed invalid number of test images' if self.args.reconstruction_file: utils.write_hdf5(self.args.reconstruction_file, self.pred_images) log('[Testing] wrote %s' % self.args.reconstruction_file) if self.args.test_theta_file: assert self.pred_codes.shape[0] == self.test_images.shape[ 0], 'computed invalid number of test codes' utils.write_hdf5(self.args.test_theta_file, self.pred_codes) log('[Testing] wrote %s' % self.args.test_theta_file) threshold = 0.9 percentage = 0 # values = numpy.linalg.norm(pred_codes, ord=2, axis=1) values = numpy.max(numpy.abs(self.pred_codes), axis=1) while percentage < 0.9: threshold += 0.1 percentage = numpy.sum(values <= threshold) / float( values.shape[0]) log('[Testing] threshold %g percentage %g' % (threshold, percentage)) log('[Testing] taking threshold %g with percentage %g' % (threshold, percentage)) if self.args.output_directory and utils.display(): # fit = 10 # plot_file = os.path.join(self.args.output_directory, 'test_codes') # plot.manifold(plot_file, pred_codes[::fit], None, None, 'tsne', None, title='t-SNE of Test Codes') # log('[Testing] wrote %s' % plot_file) for d in range(1, self.pred_codes.shape[1]): plot_file = os.path.join(self.args.output_directory, 'test_codes_%s' % d) plot.scatter( plot_file, self.pred_codes[:, 0], self.pred_codes[:, d], (values <= threshold).astype(int), ['greater %g' % threshold, 'smaller %g' % threshold], title='Dimensions 0 and %d of Test Codes' % d) log('[Testing] wrote %s' % plot_file) self.reconstruction_error /= num_batches log('[Testing] reconstruction error %g' % self.reconstruction_error)
def loop(self): """ Main loop for training and testing, saving ... """ auto_encoder_params = { 'lr': self.args.base_lr, 'lr_decay': self.args.base_lr_decay, 'lr_min': 0.000000001, 'weight_decay': self.args.weight_decay } classifier_params = { 'lr': self.args.base_lr, 'lr_decay': self.args.base_lr_decay, 'lr_min': 0.000000001, 'weight_decay': self.args.weight_decay } e = 0 if os.path.exists(self.args.encoder_file) and os.path.exists( self.args.decoder_file) and os.path.exists( self.args.classifier_file): state = State.load(self.args.encoder_file) log('[Training] loaded %s' % self.args.encoder_file) self.encoder.load_state_dict(state.model) log('[Training] loaded encoder') if self.args.use_gpu and not cuda.is_cuda(self.encoder): self.encoder = self.encoder.cuda() optimizer = torch.optim.Adam(list(self.encoder.parameters()), auto_encoder_params['lr']) optimizer.load_state_dict(state.optimizer) self.encoder_scheduler = ADAMScheduler(optimizer, **auto_encoder_params) state = State.load(self.args.decoder_file) log('[Training] loaded %s' % self.args.decoder_file) self.decoder.load_state_dict(state.model) log('[Training] loaded decoder') if self.args.use_gpu and not cuda.is_cuda(self.decoder): self.decoder = self.decoder.cuda() optimizer = torch.optim.Adam(list(self.decoder.parameters()), auto_encoder_params['lr']) optimizer.load_state_dict(state.optimizer) self.decoder_scheduler = ADAMScheduler(optimizer, **auto_encoder_params) state = State.load(self.args.classifier_file) log('[Training] loaded %s' % self.args.classifier_file) self.classifier.load_state_dict(state.model) log('[Training] loaded decoder') if self.args.use_gpu and not cuda.is_cuda(self.classifier): self.classifier = self.classifier.cuda() optimizer = torch.optim.Adam(list(self.classifier.parameters()), classifier_params['lr']) optimizer.load_state_dict(state.optimizer) self.classifier_scheduler = ADAMScheduler(optimizer, **classifier_params) e = state.epoch + 1 self.encoder_scheduler.update(e) self.decoder_scheduler.udpate(e) self.classifier_scheduler.update(e) else: if self.args.use_gpu and not cuda.is_cuda(self.encoder): self.encoder = self.encoder.cuda() if self.args.use_gpu and not cuda.is_cuda(self.decoder): self.decoder = self.decoder.cuda() if self.args.use_gpu and not cuda.is_cuda(self.classifier): self.classifier = self.classifier.cuda() self.encoder_scheduler = ADAMScheduler( list(self.encoder.parameters()), **auto_encoder_params) self.encoder_scheduler.initialize() # ! self.decoder_scheduler = ADAMScheduler( list(self.decoder.parameters()), **auto_encoder_params) self.decoder_scheduler.initialize() # ! self.classifier_scheduler = ADAMScheduler( list(self.classifier.parameters()), **classifier_params) self.classifier_scheduler.initialize() # ! log('[Training] model needs %gMiB' % (cuda.estimate_size(self.encoder) / (1024 * 1024))) while e < self.args.epochs: log('[Training] %s' % self.encoder_scheduler.report()) log('[Training] %s' % self.decoder_scheduler.report()) log('[Training] %s' % self.classifier_scheduler.report()) testing = elapsed(functools.partial(self.test, e)) training = elapsed(functools.partial(self.train, e)) log('[Training] %gs training, %gs testing' % (training, testing)) #utils.remove(self.args.encoder_file + '.%d' % (e - 1)) #utils.remove(self.args.decoder_file + '.%d' % (e - 1)) #utils.remove(self.args.classifier_file + '.%d' % (e - 1)) State.checkpoint(self.encoder, self.encoder_scheduler.optimizer, e, self.args.encoder_file + '.%d' % e) State.checkpoint(self.decoder, self.decoder_scheduler.optimizer, e, self.args.decoder_file + '.%d' % e) State.checkpoint(self.classifier, self.classifier_scheduler.optimizer, e, self.args.classifier_file + '.%d' % e) log('[Training] %d: checkpoint' % e) torch.cuda.empty_cache() # necessary? # Save statistics and plots. if self.args.training_file: utils.write_hdf5(self.args.training_file, self.train_statistics) log('[Training] %d: wrote %s' % (e, self.args.training_file)) if self.args.testing_file: utils.write_hdf5(self.args.testing_file, self.test_statistics) log('[Training] %d: wrote %s' % (e, self.args.testing_file)) #if utils.display(): # self.plot() e += 1 # ! testing = elapsed(functools.partial(self.test, e)) log('[Training] %gs testing' % (testing)) #utils.remove(self.args.encoder_file + '.%d' % (e - 1)) #utils.remove(self.args.decoder_file + '.%d' % (e - 1)) #utils.remove(self.args.classifier_file + '.%d' % (e - 1)) State.checkpoint(self.encoder, self.encoder_scheduler.optimizer, e, self.args.encoder_file) State.checkpoint(self.decoder, self.decoder_scheduler.optimizer, e, self.args.decoder_file) State.checkpoint(self.classifier, self.classifier_scheduler.optimizer, e, self.args.classifier_file) self.results = { 'training_statistics': self.train_statistics, 'testing_statistics': self.test_statistics, } if self.args.results_file: utils.write_pickle(self.args.results_file, self.results) log('[Training] wrote %s' % self.args.results_file)
def test(self, epoch): """ Test the model. :param epoch: current epoch :type epoch: int """ self.encoder.eval() log('[Training] %d set encoder to eval' % epoch) self.decoder.eval() log('[Training] %d set decoder to eval' % epoch) self.classifier.eval() log('[Training] %d set classifier to eval' % epoch) latent_loss = 0 reconstruction_loss = 0 reconstruction_error = 0 decoder_loss = 0 discriminator_loss = 0 mean = 0 var = 0 logvar = 0 pred_images = None pred_codes = None num_batches = int( math.ceil(self.test_images.shape[0] / self.args.batch_size)) assert self.encoder.training is False for b in range(num_batches): b_start = b * self.args.batch_size b_end = min((b + 1) * self.args.batch_size, self.test_images.shape[0]) batch_images = common.torch.as_variable( self.test_images[b_start:b_end], self.args.use_gpu) batch_images = batch_images.permute(0, 3, 1, 2) output_mu, output_logvar = self.encoder(batch_images) output_images = self.decoder(output_mu) output_real_classes = self.classifier(batch_images) output_reconstructed_classes = self.classifier(output_images) # Latent loss. e = self.latent_loss(output_mu, output_logvar) latent_loss += e.item() # Reconstruction loss. e = self.reconstruction_loss(batch_images, output_images) reconstruction_loss += e.item() # Reconstruction error. e = self.reconstruction_error(batch_images, output_images) reconstruction_error += e.item() e = self.decoder_loss(output_reconstructed_classes) decoder_loss += e.item() # Adversarial loss. e = self.discriminator_loss(output_real_classes, output_reconstructed_classes) discriminator_loss += e.item() mean += torch.mean(output_mu).item() var += torch.var(output_mu).item() logvar += torch.mean(output_logvar).item() output_images = numpy.squeeze( numpy.transpose(output_images.cpu().detach().numpy(), (0, 2, 3, 1))) pred_images = common.numpy.concatenate(pred_images, output_images) output_codes = output_mu.cpu().detach().numpy() pred_codes = common.numpy.concatenate(pred_codes, output_codes) utils.write_hdf5(self.args.reconstruction_file, pred_images) log('[Training] %d: wrote %s' % (epoch, self.args.reconstruction_file)) if utils.display(): png_file = self.args.reconstruction_file + '.%d.png' % epoch if epoch == 0: vis.mosaic(png_file, self.test_images[:225], 15, 5, 'gray', 0, 1) else: vis.mosaic(png_file, pred_images[:225], 15, 5, 'gray', 0, 1) log('[Training] %d: wrote %s' % (epoch, png_file)) latent_loss /= num_batches reconstruction_loss /= num_batches reconstruction_error /= num_batches decoder_loss /= num_batches discriminator_loss /= num_batches mean /= num_batches var /= num_batches logvar /= num_batches log('[Training] %d: test %g (%g) %g (%g, %g, %g)' % (epoch, reconstruction_loss, reconstruction_error, latent_loss, mean, var, logvar)) log('[Training] %d: test %g %g' % (epoch, decoder_loss, discriminator_loss)) num_batches = int( math.ceil(self.train_images.shape[0] / self.args.batch_size)) iteration = epoch * num_batches self.test_statistics = numpy.vstack( (self.test_statistics, numpy.array([ iteration, iteration * self.args.batch_size, min(num_batches, iteration), min(num_batches, iteration) * self.args.batch_size, reconstruction_loss, reconstruction_error, latent_loss, mean, var, logvar, decoder_loss, discriminator_loss ]))) pred_images = None if self.random_codes is None: self.random_codes = common.numpy.truncated_normal( (1000, self.args.latent_space_size)).astype(numpy.float32) num_batches = int( math.ceil(self.random_codes.shape[0] / self.args.batch_size)) for b in range(num_batches): b_start = b * self.args.batch_size b_end = min((b + 1) * self.args.batch_size, self.test_images.shape[0]) if b_start >= b_end: break batch_codes = common.torch.as_variable( self.random_codes[b_start:b_end], self.args.use_gpu) output_images = self.decoder(batch_codes) output_images = numpy.squeeze( numpy.transpose(output_images.cpu().detach().numpy(), (0, 2, 3, 1))) pred_images = common.numpy.concatenate(pred_images, output_images) utils.write_hdf5(self.args.random_file, pred_images) log('[Training] %d: wrote %s' % (epoch, self.args.random_file)) if utils.display() and epoch > 0: png_file = self.args.random_file + '.%d.png' % epoch vis.mosaic(png_file, pred_images[:225], 15, 5, 'gray', 0, 1) log('[Training] %d: wrote %s' % (epoch, png_file)) interpolations = None perm = numpy.random.permutation(numpy.array(range( pred_codes.shape[0]))) for i in range(50): first = pred_codes[i] second = pred_codes[perm[i]] linfit = scipy.interpolate.interp1d([0, 1], numpy.vstack([first, second]), axis=0) interpolations = common.numpy.concatenate( interpolations, linfit(numpy.linspace(0, 1, 10))) pred_images = None num_batches = int( math.ceil(interpolations.shape[0] / self.args.batch_size)) interpolations = interpolations.astype(numpy.float32) for b in range(num_batches): b_start = b * self.args.batch_size b_end = min((b + 1) * self.args.batch_size, self.test_images.shape[0]) if b_start >= b_end: break batch_codes = common.torch.as_variable( interpolations[b_start:b_end], self.args.use_gpu) output_images = self.decoder(batch_codes) output_images = numpy.squeeze( numpy.transpose(output_images.cpu().detach().numpy(), (0, 2, 3, 1))) pred_images = common.numpy.concatenate(pred_images, output_images) if b % 100 == 50: log('[Testing] %d' % b) utils.write_hdf5(self.args.interpolation_file, pred_images) log('[Testing] wrote %s' % self.args.interpolation_file) if utils.display() and epoch > 0: png_file = self.args.interpolation_file + '.%d.png' % epoch vis.mosaic(png_file, pred_images[:100], 10, 5, 'gray', 0, 1) log('[Training] %d: wrote %s' % (epoch, png_file))
def attack(self): """ Test the model. """ assert self.model is not None assert self.model.training is False if self.args.attack.find('Batch') >= 0: batch_size = min(self.args.batch_size, self.args.max_samples) else: batch_size = 1 objective = self.objective_class() num_batches = int(math.ceil(self.args.max_samples / batch_size)) # can't squeeze here! if self.test_images.shape[3] > 1: self.perturbations = numpy.zeros( (self.args.max_attempts, self.args.max_samples, self.test_images.shape[1], self.test_images.shape[2], self.test_images.shape[3])) else: self.perturbations = numpy.zeros( (self.args.max_attempts, self.args.max_samples, self.test_images.shape[1], self.test_images.shape[2])) self.success = numpy.ones( (self.args.max_attempts, self.args.max_samples), dtype=int) * -1 self.probabilities = numpy.zeros( (self.args.max_attempts, self.args.max_samples, self.N_class)) for i in range(num_batches): # self.test_images.shape[0] if i * batch_size == self.args.max_samples: break i_start = i * batch_size i_end = min((i + 1) * batch_size, self.args.max_samples) batch_images = numpy.random.randint(0, 255, size=[batch_size] + self.test_images.shape[1:]) batch_images = common.torch.as_variable(batch_images, self.args.use_gpu) batch_images = batch_images.permute(0, 3, 1, 2) batch_classes = common.torch.as_variable( numpy.random.randint(0, self.N_class - 1, size=(batch_images.size(0))), self.args.use_gpu) t = 0 while t < self.args.max_attempts: attack = self.setup_attack(batch_images, batch_classes) success, perturbations, probabilities, norm, _ = attack.run( objective) assert not numpy.any( perturbations != perturbations), perturbations # Note that we save the perturbed image, not only the perturbation! self.perturbations[t][i_start:i_end] = numpy.squeeze( numpy.transpose(perturbations + batch_images.cpu().numpy(), (0, 2, 3, 1))) self.success[t][i_start:i_end] = success self.probabilities[t][i_start:i_end] = probabilities # IMPORTANT: The adversarial examples are not considering whether the classifier is # actually correct to start with. t += 1 log('[Attack] %d: completed' % i) utils.write_hdf5(self.args.perturbations_file, self.perturbations) log('[Attack] wrote %s' % self.args.perturbations_file) utils.write_hdf5(self.args.success_file, self.success) log('[Attack] wrote %s' % self.args.success_file) utils.write_hdf5(self.args.probabilities_file, self.probabilities) log('[Attack] wrote %s' % self.args.probabilities_file)
def attack(self): """ Test the model. """ assert self.model is not None assert self.model.classifier.training is False concatenate_axis = -1 if os.path.exists(self.args.perturbations_file) and os.path.exists(self.args.success_file): self.original_perturbations = utils.read_hdf5(self.args.perturbations_file) assert len(self.original_perturbations.shape) == 3, self.original_perturbations.shape log('[Attack] read %s' % self.args.perturbations_file) self.original_success = utils.read_hdf5(self.args.success_file) log('[Attack] read %s' % self.args.success_file) assert self.original_perturbations.shape[0] == self.original_success.shape[0] assert self.original_perturbations.shape[1] == self.original_success.shape[1] assert self.original_perturbations.shape[2] == self.test_theta.shape[1] if self.original_perturbations.shape[1] <= self.args.max_samples and self.original_perturbations.shape[0] <= self.args.max_attempts: log('[Attack] found %d attempts, %d samples, requested no more' % (self.original_perturbations.shape[0], self.original_perturbations.shape[1])) return elif self.original_perturbations.shape[0] == self.args.max_attempts or self.original_perturbations.shape[1] == self.args.max_samples: if self.original_perturbations.shape[0] == self.args.max_attempts: self.test_theta = self.test_theta[self.original_perturbations.shape[1]:] self.test_fonts = self.test_fonts[self.original_perturbations.shape[1]:] self.test_classes = self.test_classes[self.original_perturbations.shape[1]:] self.args.max_samples = self.args.max_samples - self.original_perturbations.shape[1] concatenate_axis = 1 log('[Attack] found %d attempts with %d perturbations, computing %d more perturbations' % ( self.original_perturbations.shape[0], self.original_perturbations.shape[1], self.args.max_samples)) elif self.original_perturbations.shape[1] == self.args.max_samples: self.args.max_attempts = self.args.max_attempts - self.original_perturbations.shape[0] concatenate_axis = 0 log('[Attack] found %d attempts with %d perturbations, computing %d more attempts' % ( self.original_perturbations.shape[0], self.original_perturbations.shape[1], self.args.max_attempts)) self.perturbations = numpy.zeros((self.args.max_attempts, self.args.max_samples, self.test_theta.shape[1])) self.success = numpy.ones((self.args.max_attempts, self.args.max_samples), dtype=int) * -1 if self.args.attack.find('Batch') >= 0: batch_size = min(self.args.batch_size, self.args.max_samples) else: batch_size = 1 objective = self.objective_class() num_batches = int(math.ceil(self.args.max_samples/batch_size)) for i in range(num_batches): if i*batch_size == self.args.max_samples: break i_start = i * batch_size i_end = min((i + 1) * batch_size, self.args.max_samples) batch_fonts = self.test_fonts[i_start: i_end] batch_classes = self.test_classes[i_start: i_end] batch_code = numpy.concatenate((common.numpy.one_hot(batch_fonts, self.N_font), common.numpy.one_hot(batch_classes, self.N_class)), axis=1).astype(numpy.float32) batch_classes = common.torch.as_variable(batch_classes, self.args.use_gpu) batch_inputs = common.torch.as_variable(self.test_theta[i_start: i_end], self.args.use_gpu) batch_code = common.torch.as_variable(batch_code, self.args.use_gpu) t = 0 # This basically allows to only optimize over theta, keeping the font/class code fixed. self.model.decoder.set_code(batch_code) while True and t < self.args.max_attempts: attack = self.setup_attack(batch_inputs, batch_classes) success, perturbations, probabilities, norm, _ = attack.run(objective) assert not numpy.any(perturbations != perturbations), perturbations # Note that we save the perturbed image, not only the perturbation! perturbations = perturbations.reshape(batch_inputs.size()) # hack for when only one dimensional latent space is used! self.perturbations[t][i_start: i_end] = perturbations + batch_inputs.cpu().numpy() self.success[t][i_start: i_end] = success t += 1 log('[Attack] %d: completed' % i) if concatenate_axis >= 0: if self.perturbations.shape[0] == self.args.max_attempts: self.perturbations = numpy.concatenate((self.original_perturbations, self.perturbations), axis=concatenate_axis) self.success = numpy.concatenate((self.original_success, self.success), axis=concatenate_axis) log('[Attack] concatenated') utils.write_hdf5(self.args.perturbations_file, self.perturbations) log('[Attack] wrote %s' % self.args.perturbations_file) utils.write_hdf5(self.args.success_file, self.success) log('[Attack] wrote %s' % self.args.success_file)
def main(self): """ Main method. """ database = utils.read_hdf5(self.args.database_file) log('[Data] read %s' % self.args.database_file) # one-hot size of code N_fonts = database.shape[0] N_classes = database.shape[1] N = N_fonts * N_classes # # Fonts and codes are created in the following way (example for 10 classes): # # font class # 0 0 # 0 1 # ... # 0 9 # 1 0 # 1 1 # ... # 1 9 # # This scheme is then repeated according to the multiplier. # The advantage of this scheme is that a balanced subset can be selected # in multiples of 10. # codes_fonts = numpy.expand_dims(numpy.repeat(numpy.array( range(N_fonts)), N_classes, axis=0), axis=1) codes_classes = numpy.expand_dims(numpy.tile( numpy.array(range(N_classes)), (N_fonts)), axis=1) codes = numpy.concatenate((numpy.expand_dims( numpy.arange(N), axis=1), codes_fonts, codes_classes), axis=1) codes = numpy.tile(codes, (self.args.multiplier, 1)) N_theta = self.args.number_transformations theta = numpy.zeros((self.args.multiplier * N, N_theta)) assert N_theta > 0 if N_theta > 0: # translation x theta[:, 0] = numpy.random.uniform(self.args.min_translation, self.args.max_translation, size=(self.args.multiplier * N)) if N_theta > 1: # translation y theta[:, 1] = numpy.random.uniform(self.args.min_translation, self.args.max_translation, size=(self.args.multiplier * N)) if N_theta > 2: # shear x theta[:, 2] = numpy.random.uniform(self.args.min_shear, self.args.max_shear, size=(self.args.multiplier * N)) if N_theta > 3: # shear y theta[:, 3] = numpy.random.uniform(self.args.min_shear, self.args.max_shear, size=(self.args.multiplier * N)) if N_theta > 4: # scale theta[:, 4] = numpy.random.uniform(self.args.min_scale, self.args.max_scale, size=(self.args.multiplier * N)) if N_theta > 5: # rotation theta[:, 5] = numpy.random.uniform(self.args.min_rotation, self.args.max_rotation, size=(self.args.multiplier * N)) if N_theta > 6: theta[:, 6] = numpy.random.uniform(self.args.min_color, 1, size=(self.args.multiplier * N)) if N_theta > 7: theta[:, 7] = numpy.random.uniform(self.args.min_color, 1, size=(self.args.multiplier * N)) if N_theta > 8: theta[:, 8] = numpy.random.uniform(self.args.min_color, 1, size=(self.args.multiplier * N)) utils.write_hdf5(self.args.codes_file, codes) log('[Data] wrote %s' % self.args.codes_file) utils.write_hdf5(self.args.theta_file, theta) log('[Data] wrote %s' % self.args.theta_file)
def attack(self): """ Test the model. """ assert self.model is not None assert self.model.training is False assert self.test_images.shape[0] == self.test_codes.shape[0], 'number of samples has to match' concatenate_axis = -1 if os.path.exists(self.args.perturbations_file) and os.path.exists(self.args.success_file): self.original_perturbations = utils.read_hdf5(self.args.perturbations_file) if self.test_images.shape[3] > 1: assert len(self.original_perturbations.shape) == 5 else: assert len(self.original_perturbations.shape) == 4 log('[Attack] read %s' % self.args.perturbations_file) self.original_success = utils.read_hdf5(self.args.success_file) log('[Attack] read %s' % self.args.success_file) assert self.original_perturbations.shape[0] == self.original_success.shape[0] assert self.original_perturbations.shape[1] == self.original_success.shape[1] assert self.original_perturbations.shape[2] == self.test_images.shape[1] assert self.original_perturbations.shape[3] == self.test_images.shape[2]# if self.original_perturbations.shape[1] >= self.args.max_samples and self.original_perturbations.shape[0] >= self.args.max_attempts: log('[Attack] found %d attempts, %d samples, requested no more' % (self.original_perturbations.shape[0], self.original_perturbations.shape[1])) return elif self.original_perturbations.shape[0] == self.args.max_attempts or self.original_perturbations.shape[1] == self.args.max_samples: if self.original_perturbations.shape[0] == self.args.max_attempts: self.test_images = self.test_images[self.original_perturbations.shape[1]:] self.test_codes = self.test_codes[self.original_perturbations.shape[1]:] self.args.max_samples = self.args.max_samples - self.original_perturbations.shape[1] concatenate_axis = 1 log('[Attack] found %d attempts with %d perturbations, computing %d more perturbations' % (self.original_perturbations.shape[0], self.original_perturbations.shape[1], self.args.max_samples)) elif self.original_perturbations.shape[1] == self.args.max_samples: self.args.max_attempts = self.args.max_attempts - self.original_perturbations.shape[0] concatenate_axis = 0 log('[Attack] found %d attempts with %d perturbations, computing %d more attempts' % (self.original_perturbations.shape[0], self.original_perturbations.shape[1], self.args.max_attempts)) # can't squeeze here! if self.test_images.shape[3] > 1: self.perturbations = numpy.zeros((self.args.max_attempts, self.args.max_samples, self.test_images.shape[1], self.test_images.shape[2], self.test_images.shape[3])) else: self.perturbations = numpy.zeros((self.args.max_attempts, self.args.max_samples, self.test_images.shape[1], self.test_images.shape[2])) self.success = numpy.ones((self.args.max_attempts, self.args.max_samples), dtype=int) * -1 if self.args.attack.find('Batch') >= 0: batch_size = min(self.args.batch_size, self.args.max_samples) else: batch_size = 1 objective = self.objective_class() num_batches = int(math.ceil(self.args.max_samples/batch_size)) for i in range(num_batches): # self.test_images.shape[0] if i*batch_size == self.args.max_samples: break i_start = i*batch_size i_end = min((i+1)*batch_size, self.args.max_samples) batch_images = common.torch.as_variable(self.test_images[i_start: i_end], self.args.use_gpu) batch_classes = common.torch.as_variable(numpy.array(self.test_codes[i_start: i_end]), self.args.use_gpu) batch_images = batch_images.permute(0, 3, 1, 2) t = 0 while t < self.args.max_attempts: attack = self.setup_attack(batch_images, batch_classes) success, perturbations, probabilities, norm, _ = attack.run(objective) assert not numpy.any(perturbations != perturbations), perturbations # Note that we save the perturbed image, not only the perturbation! self.perturbations[t][i_start: i_end] = numpy.squeeze(numpy.transpose(perturbations + batch_images.cpu().numpy(), (0, 2, 3, 1))) self.success[t][i_start: i_end] = success # IMPORTANT: The adversarial examples are not considering whether the classifier is # actually correct to start with. t += 1 log('[Attack] %d: completed' % i) if concatenate_axis >= 0: if self.perturbations.shape[0] == self.args.max_attempts: self.perturbations = numpy.concatenate((self.original_perturbations, self.perturbations), axis=concatenate_axis) self.success = numpy.concatenate((self.original_success, self.success), axis=concatenate_axis) log('[Attack] concatenated') utils.write_hdf5(self.args.perturbations_file, self.perturbations) log('[Attack] wrote %s' % self.args.perturbations_file) utils.write_hdf5(self.args.success_file, self.success) log('[Attack] wrote %s' % self.args.success_file)