def cleverhans_spsa(model, data_iter, attack_size): while True: count = 0 try: image, label = next(data_iter) label = np.array([label]) except StopIteration: break print('Start attacking image {}'.format(count)) for x in range(): for y in range(): print("location {}".format((x, y))) subimg = get_subimgs(image, (x, y), attack_size) #Build model tic = time.time() subimg_op = Input(shape=(3, attack_size[0], attack_size[1])) adv_img_op = ApplyStickerLayer(image, attack_size, (x, y))(subimg_op) wrapped_logits_op = model(adv_img_op) wrapped_model = Model(inputs=subimg_op, outputs=wrapped_logits_op) tac = time.time() print('{}s to build graph for attack'.format(tac - tic)) wrapper = CallableModelWrapper(wrapped_model, "logits") wrapper.nb_classes = 1000 attack = SPSA(wrapper, sess=keras.backend.get_session()) spsa_params = { 'eps': 2.5, 'clip_min': -2.3, 'clip_max': 2.7, 'nb_iter': 40, 'y': label.astype(np.int32) } print('Start attacking...') tic = time.time() adv = attack.generate_np(subimg, **spsa_params) tac = time.time() print("Attack Time: {}s".format(tac - tic)) # Evaluate adversarial sticker adv_logits = wrapped_model.predict(adv) print("Adversarial image: top-5 prediction: {}, label: {}". format(np.argsort(adv_logits, axis=1)[:, -5:], label))
def spsa_attack(): # Use tf for evaluation on adversarial data tf_config = tf.ConfigProto() tf_config.gpu_options.allow_growth = True sess = tf.Session(config=tf_config) # Convert pytorch model to a tf_model and wrap it in cleverhans tf_model_fn = convert_pytorch_model_to_tf(model, out_dims=10) cleverhans_model = CallableModelWrapper(tf_model_fn, output_layer='logits') cleverhans_model.nb_classes = 10 # Create an SPSA attack spsa = SPSA(cleverhans_model, sess=sess) spsa_params = { 'eps': args.eps, 'nb_iter': args.ns, 'clip_min': 0., 'clip_max': 1., 'spsa_samples': args. spsa_samples, # in this case, the batch_size is equal to spsa_samples 'spsa_iters': 1, 'early_stop_loss_threshold': 0 } # Evaluation against SPSA attacks correct = 0 total = 0 for batch_idx, (inputs, targets) in enumerate(test_loader): advs = spsa.generate_np(inputs.numpy(), y=targets.numpy().astype(np.int32), **spsa_params) with torch.no_grad(): correct += (model( torch.tensor(advs).cuda()).topk(1)[1].cpu().eq(targets) ).sum().item() total += len(inputs) sys.stdout.write("\rBlack-box SPSA attack... Acc: %.3f%% (%d/%d)" % (100. * correct / total, correct, total)) sys.stdout.flush() print('Accuracy under SPSA attack: %.3f%%' % (100. * correct / total))
def evaluate_model(self, model_path, num_batches=-1, model_device=None): model_device = model_device or device start_sec = time() with tf.Session() as sess: torch_model_orig = torch.load( model_path, map_location=torch.device('cpu')).to(model_device) torch_model = lambda x: torch_model_orig(x.to(model_device))[0].to( device) # [0]: convert to standard format tf_model_fn = convert_pytorch_model_to_tf(torch_model, out_dims=10) cleverhans_model = CallableModelWrapper(tf_model_fn, output_layer='logits') # fix error with SPSA: "ValueError: Tried to convert 'depth' to a tensor and # failed. Error: None values not supported." cleverhans_model.nb_classes = 10 # important to shuffle the data since we'll measure standard deviation test_loader = torch.utils.data.DataLoader( self.test_data, batch_size=self.batch_size, shuffle=True) x_test_sample, _ = next( iter(test_loader)) # to get the shape of the input nchannels, img_rows, img_cols = x_test_sample.shape[1:] x = tf.placeholder(tf.float32, shape=(None, nchannels, img_rows, img_cols)) y = tf.placeholder(tf.int32, shape=(None, )) attack_model = self.attack(cleverhans_model, sess=sess) clean_preds_op = tf_model_fn(x) preds_op = tf_model_fn(x) # # to use generate() instead of generate_np() # self.params['y'] = y # advs = attack_model.generate(x, **self.params) # adv_preds_op = tf_model_fn(advs) # Run an evaluation of our model against fgsm self.saved_xs, self.saved_advs, self.saved_ys, \ self.saved_adv_preds, self.saved_clean_preds = [], [], [], [], [] accuracies = [] try: for batch_no, (xs, ys) in enumerate(test_loader): if self.attack == SPSA: self.params['y'] = ys.numpy().astype(np.int32) else: ys_one_hot = torch.nn.functional.one_hot(ys, 10).numpy() if self.attack == MaxConfidence: self.params['y'] = ys_one_hot.astype(np.float32) else: self.params['y'] = ys_one_hot.astype(np.int32) # using generate_np() or generate() leads to similar performance # not sure if the GPU is fully utilized... advs = attack_model.generate_np(xs.numpy(), **self.params) adv_preds = sess.run(preds_op, feed_dict={x: advs}) clean_preds = sess.run(preds_op, feed_dict={x: xs}) # clean_preds, adv_preds = sess.run([clean_preds_op, adv_preds_op], # feed_dict={x: xs.numpy(), y: ys.numpy()}) correct = (np.argmax(adv_preds, axis=1) == ys.numpy()).sum() total = test_loader.batch_size self.saved_xs.append(xs) self.saved_ys.append(ys) self.saved_advs.append(advs) self.saved_adv_preds.append(adv_preds) self.saved_clean_preds.append(clean_preds) accuracies.append(correct / total) if self.report_interval > 0 and batch_no % self.report_interval == 0: elapsed_sec = time() - start_sec print( '%s: Batch: #%d, accuracy: %.2f, std: %.2f, %.1f secs/batch' % (self.attack_name, batch_no, np.mean(accuracies), np.std(accuracies), elapsed_sec / (batch_no + 1)), file=sys.stderr) if num_batches > 0 and batch_no + 1 >= num_batches: break except KeyboardInterrupt: print('Evaluation aborted', file=sys.stderr) self._process_saved_info() print('%s: Accuracy under attack: %.2f (std=%.2f)' % (self.attack_name, np.mean(accuracies), np.std(accuracies)), file=sys.stderr) return accuracies
imagenet_val = datasets.ImageNet("/mnt/data/imagenet", split='val', download=False, transform=imagenet_transform) val_subset_indices = image_partition(42, 100)[0] val_subset_loader = torch.utils.data.DataLoader(imagenet_val, batch_size=1, num_workers=4, sampler=torch.utils.data.sampler.SubsetRandomSampler(val_subset_indices)) # We use tf for evaluation on adversarial data sess = tf.Session() x_op = tf.placeholder(tf.float32, shape=(None, 3, 224, 224,)) # Convert pytorch model to a tf_model and wrap it in cleverhans tf_model_fn = convert_pytorch_model_to_tf(model) cleverhans_model = CallableModelWrapper(tf_model_fn, output_layer='logits') cleverhans_model.nb_classes = 1000 spsa_op = SPSA(cleverhans_model, sess=sess) spsa_params = {'eps': 2.5, 'clip_min': -2.3, 'clip_max': 2.8, 'nb_iter': 40, 'y': None} correct = 0 count = 0 for xs, ys in val_subset_loader: count += 1 ys = ys.numpy().astype(np.int32) # Create an SPSA attack spsa_params['y'] = ys