Beispiel #1
0
def cleverhans_spsa(model, data_iter, attack_size):
    while True:
        count = 0
        try:
            image, label = next(data_iter)
            label = np.array([label])
        except StopIteration:
            break
        print('Start attacking image {}'.format(count))
        for x in range():
            for y in range():
                print("location {}".format((x, y)))
                subimg = get_subimgs(image, (x, y), attack_size)
                #Build model
                tic = time.time()
                subimg_op = Input(shape=(3, attack_size[0], attack_size[1]))
                adv_img_op = ApplyStickerLayer(image, attack_size,
                                               (x, y))(subimg_op)
                wrapped_logits_op = model(adv_img_op)
                wrapped_model = Model(inputs=subimg_op,
                                      outputs=wrapped_logits_op)
                tac = time.time()
                print('{}s to build graph for attack'.format(tac - tic))
                wrapper = CallableModelWrapper(wrapped_model, "logits")
                wrapper.nb_classes = 1000
                attack = SPSA(wrapper, sess=keras.backend.get_session())
                spsa_params = {
                    'eps': 2.5,
                    'clip_min': -2.3,
                    'clip_max': 2.7,
                    'nb_iter': 40,
                    'y': label.astype(np.int32)
                }
                print('Start attacking...')
                tic = time.time()
                adv = attack.generate_np(subimg, **spsa_params)
                tac = time.time()
                print("Attack Time: {}s".format(tac - tic))

                # Evaluate adversarial sticker
                adv_logits = wrapped_model.predict(adv)
                print("Adversarial image: top-5 prediction: {}, label: {}".
                      format(np.argsort(adv_logits, axis=1)[:, -5:], label))
Beispiel #2
0
def spsa_attack():
    # Use tf for evaluation on adversarial data
    tf_config = tf.ConfigProto()
    tf_config.gpu_options.allow_growth = True
    sess = tf.Session(config=tf_config)

    # Convert pytorch model to a tf_model and wrap it in cleverhans
    tf_model_fn = convert_pytorch_model_to_tf(model, out_dims=10)
    cleverhans_model = CallableModelWrapper(tf_model_fn, output_layer='logits')
    cleverhans_model.nb_classes = 10

    # Create an SPSA attack
    spsa = SPSA(cleverhans_model, sess=sess)
    spsa_params = {
        'eps': args.eps,
        'nb_iter': args.ns,
        'clip_min': 0.,
        'clip_max': 1.,
        'spsa_samples': args.
        spsa_samples,  # in this case, the batch_size is equal to spsa_samples
        'spsa_iters': 1,
        'early_stop_loss_threshold': 0
    }

    # Evaluation against SPSA attacks
    correct = 0
    total = 0
    for batch_idx, (inputs, targets) in enumerate(test_loader):
        advs = spsa.generate_np(inputs.numpy(),
                                y=targets.numpy().astype(np.int32),
                                **spsa_params)
        with torch.no_grad():
            correct += (model(
                torch.tensor(advs).cuda()).topk(1)[1].cpu().eq(targets)
                        ).sum().item()
        total += len(inputs)

        sys.stdout.write("\rBlack-box SPSA attack... Acc: %.3f%% (%d/%d)" %
                         (100. * correct / total, correct, total))
        sys.stdout.flush()

    print('Accuracy under SPSA attack: %.3f%%' % (100. * correct / total))
Beispiel #3
0
    def evaluate_model(self, model_path, num_batches=-1, model_device=None):
        model_device = model_device or device
        start_sec = time()
        with tf.Session() as sess:
            torch_model_orig = torch.load(
                model_path, map_location=torch.device('cpu')).to(model_device)
            torch_model = lambda x: torch_model_orig(x.to(model_device))[0].to(
                device)  # [0]: convert to standard format
            tf_model_fn = convert_pytorch_model_to_tf(torch_model, out_dims=10)
            cleverhans_model = CallableModelWrapper(tf_model_fn,
                                                    output_layer='logits')
            # fix error with SPSA: "ValueError: Tried to convert 'depth' to a tensor and
            # failed. Error: None values not supported."
            cleverhans_model.nb_classes = 10

            # important to shuffle the data since we'll measure standard deviation
            test_loader = torch.utils.data.DataLoader(
                self.test_data, batch_size=self.batch_size, shuffle=True)

            x_test_sample, _ = next(
                iter(test_loader))  # to get the shape of the input
            nchannels, img_rows, img_cols = x_test_sample.shape[1:]
            x = tf.placeholder(tf.float32,
                               shape=(None, nchannels, img_rows, img_cols))
            y = tf.placeholder(tf.int32, shape=(None, ))
            attack_model = self.attack(cleverhans_model, sess=sess)
            clean_preds_op = tf_model_fn(x)
            preds_op = tf_model_fn(x)
            # # to use generate() instead of generate_np()
            # self.params['y'] = y
            # advs = attack_model.generate(x, **self.params)
            # adv_preds_op = tf_model_fn(advs)

            # Run an evaluation of our model against fgsm
            self.saved_xs, self.saved_advs, self.saved_ys, \
                self.saved_adv_preds, self.saved_clean_preds = [], [], [], [], []
            accuracies = []
            try:
                for batch_no, (xs, ys) in enumerate(test_loader):
                    if self.attack == SPSA:
                        self.params['y'] = ys.numpy().astype(np.int32)
                    else:
                        ys_one_hot = torch.nn.functional.one_hot(ys,
                                                                 10).numpy()
                        if self.attack == MaxConfidence:
                            self.params['y'] = ys_one_hot.astype(np.float32)
                        else:
                            self.params['y'] = ys_one_hot.astype(np.int32)
                    # using generate_np() or generate() leads to similar performance
                    # not sure if the GPU is fully utilized...
                    advs = attack_model.generate_np(xs.numpy(), **self.params)

                    adv_preds = sess.run(preds_op, feed_dict={x: advs})
                    clean_preds = sess.run(preds_op, feed_dict={x: xs})
                    # clean_preds, adv_preds = sess.run([clean_preds_op, adv_preds_op],
                    #                                   feed_dict={x: xs.numpy(), y: ys.numpy()})
                    correct = (np.argmax(adv_preds,
                                         axis=1) == ys.numpy()).sum()
                    total = test_loader.batch_size

                    self.saved_xs.append(xs)
                    self.saved_ys.append(ys)
                    self.saved_advs.append(advs)
                    self.saved_adv_preds.append(adv_preds)
                    self.saved_clean_preds.append(clean_preds)
                    accuracies.append(correct / total)
                    if self.report_interval > 0 and batch_no % self.report_interval == 0:
                        elapsed_sec = time() - start_sec
                        print(
                            '%s: Batch: #%d, accuracy: %.2f, std: %.2f, %.1f secs/batch'
                            % (self.attack_name, batch_no, np.mean(accuracies),
                               np.std(accuracies), elapsed_sec /
                               (batch_no + 1)),
                            file=sys.stderr)

                    if num_batches > 0 and batch_no + 1 >= num_batches: break
            except KeyboardInterrupt:
                print('Evaluation aborted', file=sys.stderr)
            self._process_saved_info()
            print('%s: Accuracy under attack: %.2f (std=%.2f)' %
                  (self.attack_name, np.mean(accuracies), np.std(accuracies)),
                  file=sys.stderr)
            return accuracies
Beispiel #4
0
imagenet_val = datasets.ImageNet("/mnt/data/imagenet", split='val', download=False,
                                     transform=imagenet_transform)

val_subset_indices = image_partition(42, 100)[0]
val_subset_loader = torch.utils.data.DataLoader(imagenet_val,
                                                batch_size=1,
                                                num_workers=4,
                                                sampler=torch.utils.data.sampler.SubsetRandomSampler(val_subset_indices))

# We use tf for evaluation on adversarial data
sess = tf.Session()
x_op = tf.placeholder(tf.float32, shape=(None, 3, 224, 224,))
# Convert pytorch model to a tf_model and wrap it in cleverhans
tf_model_fn = convert_pytorch_model_to_tf(model)
cleverhans_model = CallableModelWrapper(tf_model_fn, output_layer='logits')
cleverhans_model.nb_classes = 1000

spsa_op = SPSA(cleverhans_model, sess=sess)
spsa_params = {'eps': 2.5,
             'clip_min': -2.3,
             'clip_max': 2.8, 
             'nb_iter': 40,
             'y': None}

correct = 0
count = 0
for xs, ys in val_subset_loader:
    count += 1
    ys = ys.numpy().astype(np.int32)
    # Create an SPSA attack
    spsa_params['y'] = ys