Beispiel #1
0
def compare_attacks(key, item):
    AdvertorchAttack = key
    fmodel = foolbox.models.PyTorchModel(model,
                                         bounds=(0, 1),
                                         num_classes=NUM_CLASS)
    fb_adversary = item["fb_class"](fmodel)
    fb_kwargs = merge2dicts(item["kwargs"], item["fb_kwargs"])
    at_kwargs = merge2dicts(item["kwargs"], item["at_kwargs"])
    thresholds = item["thresholds"]
    at_adversary = AdvertorchAttack(model, **at_kwargs)
    x_at = at_adversary.perturb(img_batch, label_batch)
    y_logits = model(img_batch)
    y_at_logits = model(x_at)
    y_pred = predict_from_logits(y_logits)
    y_at_pred = predict_from_logits(y_at_logits)

    fb_successed_once = False
    for i, (x_i, y_i) in enumerate(zip(img_batch, label_batch)):
        # rule out when classification is wrong or attack is
        # unsuccessful (we test if foolbox attacks fails here)
        if y_i != y_pred[i:i + 1][0]:
            continue
        if y_i == y_at_pred[i:i + 1][0]:
            continue
        np.random.seed(233333)
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            x_fb = fb_adversary(x_i.cpu().numpy(), label=int(y_i), **fb_kwargs)
        if x_fb is not None:
            compare_at_fb(x_at[i].cpu().numpy(), x_fb, **thresholds)
            fb_successed_once = True

    if not fb_successed_once:
        raise RuntimeError(
            "Foolbox never succeed, change your testing parameters!!!")
Beispiel #2
0
def multiple_mini_batch_attack(adversary, loader, device="cuda", norm=None):
    lst_label = []
    lst_pred = []
    lst_advpred = []
    lst_dist = []

    if norm == "inf":

        def dist_func(x, y):
            return (x - y).view(x.size(0), -1).max(dim=1)[0]
    elif norm == 1 or norm == 2:
        from advertorch.utils import _get_norm_batch

        def dist_func(x, y):
            return _get_norm_batch(x - y, norm)
    else:
        assert norm is None

    for data, label in loader:
        data, label = data.to(device), label.to(device)
        adv = adversary.perturb(data, label)
        advpred = predict_from_logits(adversary.predict(adv))
        pred = predict_from_logits(adversary.predict(data))
        lst_label.append(label)
        lst_pred.append(pred)
        lst_advpred.append(advpred)
        if norm is not None:
            lst_dist.append(dist_func(data, adv))

    return torch.cat(lst_label), torch.cat(lst_pred), torch.cat(lst_advpred), \
        torch.cat(lst_dist) if norm is not None else None
Beispiel #3
0
    def get_metric_eval(self):

        utr_score = []
        tr_score = []
        for i in range(1):

            ##TODO: Customise input parameters to methods like LinfPGDAttack
            adversary = LinfPGDAttack(
                self.phi,
                loss_fn=nn.CrossEntropyLoss(reduction="sum"),
                eps=0.10,
                nb_iter=40,
                eps_iter=0.01,
                rand_init=True,
                clip_min=0.0,
                clip_max=1.0,
                targeted=False)

            adv_untargeted = adversary.perturb(x_e, y_e)

            target = torch.ones_like(y_e) * 3
            adversary.targeted = True
            adv_targeted = adversary.perturb(x_e, target)

            pred_cln = predict_from_logits(self.phi(x_e))
            pred_untargeted_adv = predict_from_logits(self.phi(adv_untargeted))
            pred_targeted_adv = predict_from_logits(self.phi(adv_targeted))
            utr_score.append(torch.sum(pred_cln != pred_untargeted_adv))
            tr_score.append(torch.sum(pred_cln != pred_targeted_adv))

            batch_size = 5
            plt.figure(figsize=(10, 8))
            for ii in range(batch_size):
                plt.subplot(3, batch_size, ii + 1)
                _imshow(x_e[ii])
                plt.title("clean \n pred: {}".format(pred_cln[ii]))
                plt.subplot(3, batch_size, ii + 1 + batch_size)
                _imshow(adv_untargeted[ii])
                plt.title("untargeted \n adv \n pred: {}".format(
                    pred_untargeted_adv[ii]))
                plt.subplot(3, batch_size, ii + 1 + batch_size * 2)
                _imshow(adv_targeted[ii])
                plt.title("targeted to 3 \n adv \n pred: {}".format(
                    pred_targeted_adv[ii]))

            plt.tight_layout()
            plt.savefig(self.save_path + '.png')

        utr_score = np.array(utr_score)
        tr_score = np.array(tr_score)
        print('MisClassifcation on Untargetted Attack ', np.mean(utr_score),
              np.std(utr_score))
        print('MisClassifcation on Targeted Atttack', np.mean(tr_score),
              np.std(tr_score))

        self.metric_score['Untargetted Method'] = np.mean(utr_score)
        self.metric_score['Targetted Method'] = np.mean(tr_score)

        return
Beispiel #4
0
def multiple_mini_batch_attack(adversary,
                               loader,
                               device="cuda",
                               save_adv=False,
                               norm=None,
                               num_batch=None):
    lst_label = []
    lst_pred = []
    lst_advpred = []
    lst_dist = []

    _norm_convert_dict = {"Linf": "inf", "L2": 2, "L1": 1}
    if norm in _norm_convert_dict:
        norm = _norm_convert_dict[norm]

    if norm == "inf":

        def dist_func(x, y):
            return (x - y).view(x.size(0), -1).max(dim=1)[0]

    elif norm == 1 or norm == 2:
        from advertorch.utils import _get_norm_batch

        def dist_func(x, y):
            return _get_norm_batch(x - y, norm)

    else:
        assert norm is None

    idx_batch = 0

    for data, label in loader:
        data, label = data.to(device), label.to(device)
        adv = adversary.perturb(data, label)
        advpred = predict_from_logits(adversary.predict(adv))
        pred = predict_from_logits(adversary.predict(data))
        lst_label.append(label)
        lst_pred.append(pred)
        lst_advpred.append(advpred)
        if norm is not None:
            lst_dist.append(dist_func(data, adv))

        idx_batch += 1
        if idx_batch == num_batch:
            break

    return (
        torch.cat(lst_label),
        torch.cat(lst_pred),
        torch.cat(lst_advpred),
        torch.cat(lst_dist) if norm is not None else None,
    )
Beispiel #5
0
def attack_one_model(model, nb_iter, eps_iter):
    # generate attack samples
    batch_size = 100
    # dataset
    root = '../data'
    if not os.path.exists(root):
        os.mkdir(root)
    test_set = dset.MNIST(root=root, train=False, transform=transforms.ToTensor(), download=True)
    test_loader = torch.utils.data.DataLoader(dataset=test_set, batch_size=batch_size, shuffle=True)

    for cln_data, true_labels in test_loader:
        break
    cln_data, true_labels = cln_data.to(device), true_labels.to(device)

    if model_2 is not None:
        adv_targeted_results, adv_target_labels, adv_untargeted = generate_attack_samples(
            model_2, cln_data, true_labels, nb_iter, eps_iter)
    else:
        adv_targeted_results, adv_target_labels, adv_untargeted = generate_attack_samples(
            model, cln_data, true_labels, nb_iter, eps_iter)

    defense_cln_acc = 0.0
    defense_acc = 0.0
    defense_rate = 0.0
    attack_rate = 0.0

    pred_cln = predict_from_logits(model(cln_data))

    for targeted_label in range(len(adv_targeted_results)):
        # make sure label index equals to adv target label
        assert targeted_label == adv_target_labels[targeted_label][0]
        for pred_label, adv_result, true_label in zip(pred_cln, adv_targeted_results[targeted_label], true_labels):
            if true_label == targeted_label:
                continue
            pred_targeted_adv = predict_from_logits(model(adv_result.unsqueeze(0)))
            if pred_label == true_label:
                defense_cln_acc += 1
            if pred_targeted_adv == true_label:
                defense_acc += 1
            if pred_label == pred_targeted_adv:
                defense_rate += 1
            if pred_targeted_adv == targeted_label:
                attack_rate += 1

    defense_cln_acc /= 900
    defense_acc /= 900
    defense_rate /= 900
    attack_rate /= 900

    return defense_cln_acc, defense_acc, defense_rate, attack_rate
Beispiel #6
0
def multiple_mini_batch_attack(adversary, loader, device="cuda"):
    lst_label = []
    lst_pred = []
    lst_advpred = []

    for data, label in loader:
        data, label = data.to(device), label.to(device)
        adv = adversary.perturb(data, label)
        advpred = predict_from_logits(adversary.predict(adv))
        pred = predict_from_logits(adversary.predict(data))
        lst_label.append(label)
        lst_pred.append(pred)
        lst_advpred.append(advpred)

    return torch.cat(lst_label), torch.cat(lst_pred), torch.cat(lst_advpred)
Beispiel #7
0
 def predict_then_update_loss_acc_meter(self, meter, data, target):
     with torch.no_grad(), ctx_eval(self.model):
         output = self.model(data)
     acc = get_accuracy(predict_from_logits(output), target)
     loss = self.loss_fn(output, target).item()
     update_loss_acc_meter(meter, loss, acc, len(data))
     return loss, acc
Beispiel #8
0
def attack_whole_dataset(adversary, loader, device="cuda"):
    lst_adv = []
    lst_label = []
    lst_pred = []
    lst_advpred = []
    for data, label in loader:
        data, label = data.to(device), label.to(device)
        pred = predict_from_logits(adversary.predict(data))
        adv = adversary.perturb(data, label)
        advpred = predict_from_logits(adversary.predict(adv))
        lst_label.append(label)
        lst_pred.append(pred)
        lst_advpred.append(advpred)
        lst_adv.append(adv)
    return torch.cat(lst_adv), torch.cat(lst_label), torch.cat(lst_pred), \
        torch.cat(lst_advpred)
Beispiel #9
0
    def train_one_epoch(self):
        _bgn_epoch = time.time()
        if self.verbose:
            print("Training epoch {}".format(self.epochs))
        self.model.train()
        self.model.to(self.device)
        self.reset_epoch_meters()
        self.reset_disp_meters()

        _train_time = 0.

        for batch_idx, (data, idx) in enumerate(self.loader):
            data, idx = data.to(self.device), idx.to(self.device)
            target = self.loader.targets[idx]

            _bgn_train = time.time()
            clnoutput, clnloss, eps = self.train_one_batch(data, idx, target)
            _train_time = _train_time + (time.time() - _bgn_train)

            clnacc = get_accuracy(predict_from_logits(clnoutput), target)
            update_loss_acc_meter(self.cln_meter, clnloss.item(), clnacc,
                                  len(data))
            update_eps_meter(self.eps_meter, eps.mean().item(), len(data))

            if self.disp_interval is not None and \
                    batch_idx % self.disp_interval == 0:
                self.print_disp_meters(batch_idx)
                self.reset_disp_meters()

            if self.steps == self.max_steps:
                self.stop_training()
                break

        self.print_disp_meters(batch_idx)
        self.disp_eps_hist()
        self.epochs += 1
        self._adjust_lr_by_epochs()

        print("total epoch time", time.time() - _bgn_epoch)
        print("training total time", _train_time)
Beispiel #10
0
    idx += 1

avg_distortion_rate /= 4  # 4 x 250 = 1000

adv_targeted_results = [torch.cat(result) for result in adv_targeted_results]
adv_target_labels = [torch.cat(label) for label in adv_target_labels]

defense_cln_acc = 0.0
defense_acc = 0.0
defense_rate = 0.0
attack_rate = 0.0

cln_data = torch.cat(cln_data, 0)  # 100, 1, 28, 28
true_labels = torch.cat(true_labels, 0)  # 100

pred_cln = predict_from_logits(model(cln_data.to(device)))

with torch.no_grad():
    loader = tqdm(range(len(adv_targeted_results)),
                  total=len(range(len(adv_targeted_results))))
    for targeted_label in loader:
        # make sure label index equals to adv target label
        assert targeted_label == adv_target_labels[targeted_label][0]
        for pred_label, adv_result, true_label in zip(
                pred_cln, adv_targeted_results[targeted_label], true_labels):
            if true_label == targeted_label:
                continue
            pred_targeted_adv = predict_from_logits(
                model(adv_result.unsqueeze(0)))
            if pred_label == true_label:
                defense_cln_acc += 1
adversary = LinfPGDAttack(
    model, loss_fn=nn.CrossEntropyLoss(reduction="sum"), eps=0.15,
    nb_iter=40, eps_iter=0.01, rand_init=True, clip_min=0.0, clip_max=1.0,
    targeted=False)
'''

# generate untargeted adversarial samples
adv_untargeted = adversary.perturb(cln_data, true_label)

# generate targeted adversarial samples
target = torch.ones_like(true_label) * 3
adversary.targeted = True
adv_targeted = adversary.perturb(cln_data, target)

# Test the model on these samples
pred_cln = predict_from_logits(model(cln_data))
pred_untargeted_adv = predict_from_logits(model(adv_untargeted))
pred_targeted_adv = predict_from_logits(model(adv_targeted))

# Show the results
# Model performacne on clean, untargeted and targeted images
# ----------------------------------------------------------
plt.figure(figsize=(10, 8))
for ii in range(batch_size):
    plt.subplot(3, batch_size, ii + 1)
    _imshow(cln_data[ii])
    plt.title("clean \n pred: {}".format(emotion_lst[pred_cln[ii]]))
    plt.subplot(3, batch_size, ii + 1 + batch_size)
    _imshow(adv_untargeted[ii])
    plt.title("untargeted \n adv \n pred: {}".format(
        emotion_lst[pred_untargeted_adv[ii]]))
Beispiel #12
0
def w_dist(x, y):
    return (wass_model(x) - wass_model(y)).detach().numpy().mean()


loader = get_mnist_test_loader(batch_size=args.batch_size, shuffle=True)
clns = []
advs = []
trues = []
pred_clns = []
pred_advs = []
iter_counts = []
for batch_idx, (cln_data, true_label) in enumerate(loader):
    cln_data, true_label = cln_data.to(device), true_label.to(device)
    adv_untargeted, iter_count = adversary.perturb(cln_data, true_label)
    iter_counts.append(iter_count)
    pred_cln = predict_from_logits(model(cln_data))
    pred_untargeted_adv = predict_from_logits(model(adv_untargeted))
    clns = clns + list(cln_data.numpy())
    advs = advs + list(adv_untargeted.numpy())
    trues = trues + list(true_label.numpy())
    pred_clns = pred_clns + list(pred_cln.numpy())
    pred_advs = pred_advs + list(pred_untargeted_adv.numpy())
    if (batch_idx == args.iters):
        break

np.save('adv_data/clns.npy', clns)
np.save('adv_data/advs.npy', advs)
np.save('adv_data/true_labels.npy', trues)
np.save('adv_data/pred_clns.npy', pred_clns)
np.save('adv_data/pred_advs.npy', pred_advs)
Beispiel #13
0
    def get_metric_eval(self):        

        utr_score=[]
        tr_score=[]
        for i in range(1):
            
            ##TODO: Customise input parameters to methods like LinfPGDAttack
            adversary = LinfPGDAttack(
                self.phi, loss_fn=nn.CrossEntropyLoss(reduction="sum"), eps=self.args.adv_eps, nb_iter=70, eps_iter=0.01, rand_init=True, clip_min=(0.0-0.1307)/0.3081, clip_max=(1.0-0.1307)/0.3081,
                targeted=False)    

            pred_cln=[]
            pred_untargeted_adv=[]
            pred_targeted_adv=[]
            temp_counter=0
            for batch_idx, (x_e, y_e ,d_e, idx_e) in enumerate(self.test_dataset):
                x_e= x_e.to(self.cuda)
                print(torch.min(x_e), torch.max(x_e))
                y_e= torch.argmax(y_e, dim=1).to(self.cuda)

                adversary.targeted = False
                adv_untargeted = adversary.perturb(x_e, y_e)
                
                target = torch.ones_like(y_e)*3
                adversary.targeted = True
                adv_targeted = adversary.perturb(x_e, target)
                print(torch.min(adv_untargeted), torch.max(adv_untargeted))
                pred_cln.append( predict_from_logits(self.phi(x_e)) )
                pred_untargeted_adv.append( predict_from_logits(self.phi(adv_untargeted)) )
                pred_targeted_adv.append( predict_from_logits(self.phi(adv_targeted)) )
            
                temp_counter+=1
                if temp_counter ==5:
                    break
                    
            pred_cln= torch.cat(pred_cln)
            pred_untargeted_adv= torch.cat(pred_untargeted_adv)
            pred_targeted_adv= torch.cat(pred_targeted_adv)
            utr_score.append( torch.sum( pred_cln != pred_untargeted_adv).detach().cpu().numpy() / pred_cln.shape[0] )
            tr_score.append( torch.sum(pred_cln!= pred_targeted_adv).detach().cpu().numpy() / pred_cln.shape[0] )

#             batch_size=5
#             plt.figure(figsize=(10, 8))
#             for ii in range(batch_size):
#                 plt.subplot(3, batch_size, ii + 1)
#                 _imshow(x_e[ii])
#                 plt.title("clean \n pred: {}".format(pred_cln[ii]))
#                 plt.subplot(3, batch_size, ii + 1 + batch_size)
#                 _imshow(adv_untargeted[ii])
#                 plt.title("untargeted \n adv \n pred: {}".format(
#                     pred_untargeted_adv[ii]))
#                 plt.subplot(3, batch_size, ii + 1 + batch_size * 2)
#                 _imshow(adv_targeted[ii])
#                 plt.title("targeted to 3 \n adv \n pred: {}".format(
#                     pred_targeted_adv[ii]))

#             plt.tight_layout()
#             plt.savefig( self.save_path + '.png' )


        utr_score= np.array(utr_score)
        tr_score= np.array(tr_score)
        print('MisClassifcation on Untargetted Attack ', np.mean(utr_score), np.std(utr_score), self.args.adv_eps  ) 
        print('MisClassifcation on Targeted Atttack', np.mean(tr_score), np.std(tr_score), self.args.adv_eps )
    
        self.metric_score['Untargetted Method']= np.mean( utr_score ) 
        self.metric_score['Targetted Method']= np.mean( tr_score )
        
        return