Ejemplo n.º 1
0
    def i_fgsm(self,
               x,
               y,
               targeted=False,
               eps=0.03,
               alpha=1,
               iteration=1,
               x_val_min=-1,
               x_val_max=1):
        x_adv = Variable(x.data, requires_grad=True)
        for i in range(iteration):
            h_adv = self.net(x_adv)
            if targeted:
                cost = self.criterion(h_adv, y)
            else:
                cost = -self.criterion(h_adv, y)

            self.net.zero_grad()
            if x_adv.grad is not None:
                x_adv.grad.data.fill_(0)
            cost.backward()

            x_adv.grad.sign_()
            x_adv = x_adv - alpha * x_adv.grad
            x_adv = where(x_adv > x + eps, x + eps, x_adv)
            x_adv = where(x_adv < x - eps, x - eps, x_adv)
            x_adv = torch.clamp(x_adv, x_val_min, x_val_max)
            x_adv = Variable(x_adv.data, requires_grad=True)

        h = self.net(x)
        h_adv = self.net(x_adv)

        return x_adv, h_adv, h
Ejemplo n.º 2
0
def test(model,
         test_dataloader,
         cost,
         print_freq=40,
         batch_num=None,
         denoise=None,
         random_layer=None):
    batch_time = AverageMeter()
    losses = AverageMeter()
    error = AverageMeter()
    meanD = AverageMeter()

    end = time.time()
    model.eval()
    for i, (images, adversarial_images, labels,
            diffs) in enumerate(test_dataloader):
        adversarial_images, labels = adversarial_images.cuda(), labels.cuda()
        if denoise:
            adversarial_images = denoise(adversarial_images)
        if random_layer:
            adversarial_images = random_layer(adversarial_images)

        #diffs=diffs.cu
        outputs = model(adversarial_images)
        loss = cost(outputs, labels)

        batch_size = labels.size(0)
        outputs = outputs.max(1)[1]
        error.update(
            torch.ne(outputs.cpu(), labels.cpu()).float().sum().item() /
            batch_size, batch_size)
        losses.update(loss.item(), batch_size)
        meanD.update(
            where((outputs == labels).cpu(), diffs.float(), 0.).mean().item(),
            batch_size)
        batch_time.update(time.time() - end)
        end = time.time()

        if i % print_freq == 0:
            res = '\t'.join([
                'test',
                'Iter: [%d/%d]' % (i + 1, batch_num),
                'Time %.3f (%.3f)' % (batch_time.val, batch_time.avg),
                'Loss %.4f (%.4f)' % (losses.val, losses.avg),
                'Error %.4f (%.4f)' % (error.val, error.avg),
                'meanD %.4f (%.4f)' % (meanD.val, meanD.avg)
            ])
            print(res)
    return batch_time.avg, losses.avg, error.avg, meanD.avg
Ejemplo n.º 3
0
    def FGSM(self,
             x,
             y_true,
             y_target=None,
             eps=0.03,
             alpha=2 / 255,
             iteration=1):
        self.set_mode('eval')
        if type(x) == np.ndarray:
            x = torch.from_numpy(x)
        if type(y_true) == np.ndarray:
            y_true = torch.from_numpy(y_true)
        x = Variable(x, requires_grad=True).to(self.device)
        y_true = Variable(y_true, requires_grad=False).to(self.device)

        if y_target is not None:
            targeted = True
            y_target = Variable(y_target, requires_grad=False).to(self.device)
        else:
            targeted = False

        h = self.net(x)
        prediction = h.max(1)[1]
        accuracy = torch.eq(prediction, y_true).float().mean()
        cost = F.cross_entropy(h, y_true)

        if iteration == 1:
            if targeted:
                x_adv, h_adv, h = self.attack.fgsm(x, y_target, True, eps)
            else:
                x_adv, h_adv, h = self.attack.fgsm(x, y_true, False, eps)
        else:
            if targeted:
                x_adv, h_adv, h = self.attack.i_fgsm(x, y_target, True, eps,
                                                     alpha, iteration)
            else:
                x_adv, h_adv, h = self.attack.i_fgsm(x, y_true, False, eps,
                                                     alpha, iteration)

        prediction_adv = h_adv.max(1)[1]
        accuracy_adv = torch.eq(prediction_adv, y_true).float().mean()
        cost_adv = F.cross_entropy(h_adv, y_true)

        # make indication of perturbed images that changed predictions of the classifier
        if targeted:
            changed = torch.eq(y_target, prediction_adv)
        else:
            changed = torch.eq(prediction, prediction_adv)
            changed = torch.eq(changed, 0)
        changed = changed.float().view(-1, 1, 1, 1).repeat(1, 3, 28, 28)

        changed[:, 0, :, :] = where(changed[:, 0, :, :] == 1, 252, 91)
        changed[:, 1, :, :] = where(changed[:, 1, :, :] == 1, 39, 252)
        changed[:, 2, :, :] = where(changed[:, 2, :, :] == 1, 25, 25)
        changed = self.scale(changed / 255)
        changed[:, :, 3:-2, 3:-2] = x_adv.repeat(1, 3, 1, 1)[:, :, 3:-2, 3:-2]

        self.set_mode('train')

        return x_adv.data, changed.data,\
                (accuracy.data[0], cost.data[0], accuracy_adv.data[0], cost_adv.data[0])
Ejemplo n.º 4
0
def kl_y_to_p(logits, y):
    y_logy = torch.sum(utils.where(y > 0, y * torch.log(y)), dim=-1)
    y_logp = torch.sum(utils.where(
        y > 0, y * torch.log(utils.logits_to_probs(logits))),
                       dim=-1)
    return y_logy - y_logp
Ejemplo n.º 5
0
def entropy(probs):
    return -torch.sum(utils.where(probs > 0, probs * torch.log(probs)), dim=-1)
Ejemplo n.º 6
0
def train_epoch(model_denoise,model_classify,train_dataloader,optimizer, cost,epoch, n_epochs, print_freq=40,batch_num=None,random_layer=None):

    batch_time = AverageMeter()
    losses = AverageMeter()
    error1 = AverageMeter()
    error2 = AverageMeter()
    error3 = AverageMeter()
    error4 = AverageMeter()
    meanD=AverageMeter()

    end = time.time()
    model_denoise.train()
    model_classify.eval()
    for i,(clean_images,adversarial_images,labels,diffs) in enumerate(train_dataloader):
        #print(i)
        #print(type(images),type(labels))
        #print(images.shape,labels.shape)
        #print(clean_images.shape,adversarial_images.shape)
        #if i>200:
        #    return batch_time.avg, losses.avg, error1.avg,error2.avg,meanD.avg
        #print(labels)
        clean_images,adversarial_images,labels=clean_images.cuda(),adversarial_images.cuda(),labels.cuda()
        #print(clean_images.shape,adversarial_images.shape)
        #print(images.shape,labels.shape)
        optimizer.zero_grad()
        #if i%print_freq==0:
        #    d_clean_images= model_denoise(clean_images)
        d_adversarial_images= model_denoise(adversarial_images)

        if random_layer:
            clean_images,d_adversarial_images=random_layer(clean_images,d_adversarial_images)

        #print(clean_images.shape,d_adversarial_images.shape)
        y1,f1=model_classify(clean_images,need_feature=True)
        y2,f2=model_classify(d_adversarial_images,need_feature=True)

        #if i%print_freq==0:
        #    #d_clean_images= model_denoise(clean_images)
        #    if random_layer:
        #        d_clean_y,adv_y=random_layer(d_clean_images,adversarial_images)
        #d_clean_y=model_classify(clean_images)
        #    adv_y=model_classify(adversarial_images)

        y1=y1.detach()
        f1=f1.detach()

        loss=cost(y1,y2).mean()
        loss.backward()
        optimizer.step()

        batch_size=labels.size(0)
        y1=y1.max(1)[1]
        y2=y2.max(1)[1]
        #if i%print_freq==0:
        #    d_clean_y=d_clean_y.max(1)[1]
        #    adv_y=adv_y.max(1)[1]
        error1.update(torch.ne(y1.cpu(), labels.cpu()).float().sum().item() / batch_size, batch_size)
        error2.update(torch.ne(y2.cpu(), labels.cpu()).float().sum().item() / batch_size, batch_size)
        meanD.update(where((y2==labels).cpu(),diffs.float(),0.).mean().item(),batch_size)
        #if i%print_freq==0:
        #    error3.update(torch.ne(d_clean_y.cpu(), labels.cpu()).float().sum().item() / batch_size, batch_size)
        #    error4.update(torch.ne(adv_y.cpu(), labels.cpu()).float().sum().item() / batch_size, batch_size)

        losses.update(loss.item(), batch_size)
        batch_time.update(time.time() - end)
        end = time.time()


        if i % print_freq == 0:
            res = '\t'.join([
                'Epoch: [%d/%d]' % (epoch + 1, n_epochs),
                'Iter: [%d/%d]' % (i + 1, batch_num),
                'Time %.3f (%.3f)' % (batch_time.val, batch_time.avg),
                'Loss %.4f (%.4f)' % (losses.val, losses.avg),
                'Error_clean %.4f (%.4f)' % (error1.val, error1.avg),
                'Error_adversaril %.4f (%.4f)' % (error2.val, error2.avg),
                #'Error_row %.4f(clean with denoise)/%.4f(adversarial without denoise)' % (error3.avg, error4.avg),
                'meanD %.4f (%.4f)' %(meanD.val,meanD.avg)
            ])
            print(res)
    return batch_time.avg, losses.avg, error1.avg,error2.avg,meanD.avg
Ejemplo n.º 7
0
def valid_epoch(model_denoise,model_classify,valid_dataloader,cost,print_freq=40,batch_num=None,random_layer=None):
    batch_time = AverageMeter()
    losses = AverageMeter()
    error1 = AverageMeter()
    error2 = AverageMeter()
    error3 = AverageMeter()
    error4 = AverageMeter()
    meanD=AverageMeter()

    end = time.time()
    model_denoise.eval()
    model_classify.eval()
    for i,(clean_images,adversarial_images,labels,diffs) in enumerate(valid_dataloader):
        #print(type(images),type(labels))
        #print(images.shape,labels.shape)
        #if i>10:
        #    return batch_time.avg, losses.avg, error1.avg,error2.avg,meanD.avg
        #print(clean_images.shape,adversarial_images.shape)
        clean_images,adversarial_images,labels=clean_images.cuda(),adversarial_images.cuda(),labels.cuda()
        #print(images.shape,labels.shape)

        #d_clean_images= model_denoise(clean_images)
        d_adversarial_images= model_denoise(adversarial_images)

        if random_layer:
            clean_images=random_layer(clean_images)
            d_adversarial_images=random_layer(d_adversarial_images)

        y1,f1=model_classify(clean_images,need_feature=True)
        y2,f2=model_classify(d_adversarial_images,need_feature=True)

        #clean_y=model_classify(clean_images)
        #adv_y=model_classify(adversarial_images)


        loss=cost(y1,y2).mean()

        batch_size=labels.size(0)
        y1=y1.max(1)[1]
        y2=y2.max(1)[1]
        #clean_y=clean_y.max(1)[1]
        #adv_y=adv_y.max(1)[1]
        error1.update(torch.ne(y1.cpu(), labels.cpu()).float().sum().item() / batch_size, batch_size)
        error2.update(torch.ne(y2.cpu(), labels.cpu()).float().sum().item() / batch_size, batch_size)
        meanD.update(where((y2==labels).cpu(),diffs.float(),0.).mean().item(),batch_size)
        #error3.update(torch.ne(clean_y.cpu(), labels.cpu()).float().sum().item() / batch_size, batch_size)
        #error4.update(torch.ne(adv_y.cpu(), labels.cpu()).float().sum().item() / batch_size, batch_size)

        losses.update(loss.item(), batch_size)
        batch_time.update(time.time() - end)
        end = time.time()


        if i % print_freq == 0:
            res = '\t'.join([
                'Valid:',
                'Iter: [%d/%d]' % (i + 1, batch_num),
                'Time %.3f (%.3f)' % (batch_time.val, batch_time.avg),
                'Loss %.4f (%.4f)' % (losses.val, losses.avg),
                'Error_clean %.4f (%.4f)' % (error1.val, error1.avg),
                'Error_adversaril %.4f (%.4f)' % (error2.val, error2.avg),
                'meanD %.4f (%.4f)' %(meanD.val,meanD.avg)
                #'Error_row %.4f/%.4f' % (error3.avg, error4.avg),
            ])
            print(res)
    return batch_time.avg, losses.avg, error1.avg,error2.avg,meanD.avg