Ejemplo n.º 1
0
    def get_metric_eval(self):

        utr_score = []
        tr_score = []
        for i in range(1):

            ##TODO: Customise input parameters to methods like LinfPGDAttack
            adversary = LinfPGDAttack(
                self.phi,
                loss_fn=nn.CrossEntropyLoss(reduction="sum"),
                eps=0.10,
                nb_iter=40,
                eps_iter=0.01,
                rand_init=True,
                clip_min=0.0,
                clip_max=1.0,
                targeted=False)

            adv_untargeted = adversary.perturb(x_e, y_e)

            target = torch.ones_like(y_e) * 3
            adversary.targeted = True
            adv_targeted = adversary.perturb(x_e, target)

            pred_cln = predict_from_logits(self.phi(x_e))
            pred_untargeted_adv = predict_from_logits(self.phi(adv_untargeted))
            pred_targeted_adv = predict_from_logits(self.phi(adv_targeted))
            utr_score.append(torch.sum(pred_cln != pred_untargeted_adv))
            tr_score.append(torch.sum(pred_cln != pred_targeted_adv))

            batch_size = 5
            plt.figure(figsize=(10, 8))
            for ii in range(batch_size):
                plt.subplot(3, batch_size, ii + 1)
                _imshow(x_e[ii])
                plt.title("clean \n pred: {}".format(pred_cln[ii]))
                plt.subplot(3, batch_size, ii + 1 + batch_size)
                _imshow(adv_untargeted[ii])
                plt.title("untargeted \n adv \n pred: {}".format(
                    pred_untargeted_adv[ii]))
                plt.subplot(3, batch_size, ii + 1 + batch_size * 2)
                _imshow(adv_targeted[ii])
                plt.title("targeted to 3 \n adv \n pred: {}".format(
                    pred_targeted_adv[ii]))

            plt.tight_layout()
            plt.savefig(self.save_path + '.png')

        utr_score = np.array(utr_score)
        tr_score = np.array(tr_score)
        print('MisClassifcation on Untargetted Attack ', np.mean(utr_score),
              np.std(utr_score))
        print('MisClassifcation on Targeted Atttack', np.mean(tr_score),
              np.std(tr_score))

        self.metric_score['Untargetted Method'] = np.mean(utr_score)
        self.metric_score['Targetted Method'] = np.mean(tr_score)

        return
Ejemplo n.º 2
0
def generate_attack_samples(model, cln_data, true_label, nb_iter, eps_iter):
    adversary = LinfPGDAttack(
        model, loss_fn=nn.CrossEntropyLoss(reduction="sum"), eps=0.25,
        nb_iter=nb_iter, eps_iter=eps_iter, rand_init=True, clip_min=0.0, clip_max=1.0,
        targeted=False)

    adv_untargeted = adversary.perturb(cln_data, true_label)

    adv_targeted_results = []
    adv_target_labels = []
    for target_label in range(0, 10):
        assert target_label >= 0 and target_label <= 10 and type(
            target_label) == int
        target = torch.ones_like(true_label) * target_label
        adversary.targeted = True
        adv_targeted = adversary.perturb(cln_data, target)
        adv_targeted_results.append(adv_targeted)
        adv_target_labels.append(target)

    return adv_targeted_results, adv_target_labels, adv_untargeted
Ejemplo n.º 3
0
    def get_metric_eval(self):        

        utr_score=[]
        tr_score=[]
        for i in range(1):
            
            ##TODO: Customise input parameters to methods like LinfPGDAttack
            adversary = LinfPGDAttack(
                self.phi, loss_fn=nn.CrossEntropyLoss(reduction="sum"), eps=self.args.adv_eps, nb_iter=70, eps_iter=0.01, rand_init=True, clip_min=(0.0-0.1307)/0.3081, clip_max=(1.0-0.1307)/0.3081,
                targeted=False)    

            pred_cln=[]
            pred_untargeted_adv=[]
            pred_targeted_adv=[]
            temp_counter=0
            for batch_idx, (x_e, y_e ,d_e, idx_e) in enumerate(self.test_dataset):
                x_e= x_e.to(self.cuda)
                print(torch.min(x_e), torch.max(x_e))
                y_e= torch.argmax(y_e, dim=1).to(self.cuda)

                adversary.targeted = False
                adv_untargeted = adversary.perturb(x_e, y_e)
                
                target = torch.ones_like(y_e)*3
                adversary.targeted = True
                adv_targeted = adversary.perturb(x_e, target)
                print(torch.min(adv_untargeted), torch.max(adv_untargeted))
                pred_cln.append( predict_from_logits(self.phi(x_e)) )
                pred_untargeted_adv.append( predict_from_logits(self.phi(adv_untargeted)) )
                pred_targeted_adv.append( predict_from_logits(self.phi(adv_targeted)) )
            
                temp_counter+=1
                if temp_counter ==5:
                    break
                    
            pred_cln= torch.cat(pred_cln)
            pred_untargeted_adv= torch.cat(pred_untargeted_adv)
            pred_targeted_adv= torch.cat(pred_targeted_adv)
            utr_score.append( torch.sum( pred_cln != pred_untargeted_adv).detach().cpu().numpy() / pred_cln.shape[0] )
            tr_score.append( torch.sum(pred_cln!= pred_targeted_adv).detach().cpu().numpy() / pred_cln.shape[0] )

#             batch_size=5
#             plt.figure(figsize=(10, 8))
#             for ii in range(batch_size):
#                 plt.subplot(3, batch_size, ii + 1)
#                 _imshow(x_e[ii])
#                 plt.title("clean \n pred: {}".format(pred_cln[ii]))
#                 plt.subplot(3, batch_size, ii + 1 + batch_size)
#                 _imshow(adv_untargeted[ii])
#                 plt.title("untargeted \n adv \n pred: {}".format(
#                     pred_untargeted_adv[ii]))
#                 plt.subplot(3, batch_size, ii + 1 + batch_size * 2)
#                 _imshow(adv_targeted[ii])
#                 plt.title("targeted to 3 \n adv \n pred: {}".format(
#                     pred_targeted_adv[ii]))

#             plt.tight_layout()
#             plt.savefig( self.save_path + '.png' )


        utr_score= np.array(utr_score)
        tr_score= np.array(tr_score)
        print('MisClassifcation on Untargetted Attack ', np.mean(utr_score), np.std(utr_score), self.args.adv_eps  ) 
        print('MisClassifcation on Targeted Atttack', np.mean(tr_score), np.std(tr_score), self.args.adv_eps )
    
        self.metric_score['Untargetted Method']= np.mean( utr_score ) 
        self.metric_score['Targetted Method']= np.mean( tr_score )
        
        return