def PGD(model,loss,data,target,eps=0.1,eps_iter=0.1,bounds=[],steps=1):
    """
    model
    loss : loss used for training
    data : input to network
    target : ground truth label corresponding to data
    eps : perturbation srength added to image
    eps_iter
    """
    #Raise error if in training mode
    if model.training:
        assert 'Model is in  training mode'
    tar = Variable(target.cuda())
    data = data.cuda()
    B,C,H,W = data.size()
    noise  = torch.FloatTensor(np.random.uniform(-eps,eps,(B,C,H,W))).cuda()
    noise  = torch.clamp(noise,-eps,eps)
    for step in range(steps):
        # convert data and corresponding into cuda variable
        img = data + noise
        img = Variable(img,requires_grad=True)
        # make gradient of img to zeros
        zero_gradients(img) 
        # forward pass
        out  = model(img)
        #compute loss using true label
        cost = loss(out,tar)
        #backward pass
        cost.backward()
        #get gradient of loss wrt data
        per =  torch.sign(img.grad.data)
        #convert eps 0-1 range to per channel range 
        per[:,0,:,:] = (eps_iter * (bounds[0,1] - bounds[0,0])) * per[:,0,:,:]
        if(per.size(1)>1):
            per[:,1,:,:] = (eps_iter * (bounds[1,1] - bounds[1,0])) * per[:,1,:,:]
            per[:,2,:,:] = (eps_iter * (bounds[2,1] - bounds[2,0])) * per[:,2,:,:]
        #  ascent
        adv = img.data + per.cuda()
        #clip per channel data out of the range
        img.requires_grad =False
        img[:,0,:,:] = torch.clamp(adv[:,0,:,:],bounds[0,0],bounds[0,1])
        if(per.size(1)>1):
            img[:,1,:,:] = torch.clamp(adv[:,1,:,:],bounds[1,0],bounds[1,1])
            img[:,2,:,:] = torch.clamp(adv[:,2,:,:],bounds[2,0],bounds[2,1])
        img = img.data
        noise = img - data
        noise  = torch.clamp(noise,-eps,eps)
    img = data + noise
    return img
Exemple #2
0
    def get_diff_logits_grads_batch_targeted(self, imgs, la, la_target):
        u = torch.arange(imgs.shape[0])
        im = imgs.clone().requires_grad_()
        with torch.enable_grad():
            y = self.model(im)
            diffy = -(y[u, la] - y[u, la_target])
            sumdiffy = diffy.sum()

        zero_gradients(im)
        sumdiffy.backward()
        graddiffy = im.grad.data
        df = diffy.detach().unsqueeze(1)
        dg = graddiffy.unsqueeze(1)

        return df, dg
Exemple #3
0
def B(normalized_my_tensor, model, labels):
    # fast gradient signed method
    image_var = Variable(normalized_my_tensor, requires_grad=True)
    image_var.data = normalized_my_tensor
    for k in range(5):
        zero_gradients(image_var)
        pred_vector1 = model(image_var)
        calc_loss(pred_vector1, 466)
        noise = image_var.data - (
            0.03 * torch.sign(image_var.grad.data)) - normalized_my_tensor
        noise = torch.clamp(noise, -0.3, 0.3)
        image_var.data = normalized_my_tensor + noise
    pred_vector2 = model(image_var)
    classify(pred_vector2, labels, "b")
    show_img(image_var, 2, "b")
Exemple #4
0
def FGSM_Attack_step(model,loss,image,target,eps=0.1,bounds=[0,1], steps=30):
    assert (not(model.training)), 'Model should be in eval mode'    
    tar = Variable(target.cuda())
    img = image.cuda()
    eps = eps/steps
    for step in range(steps):
        img = Variable(img,requires_grad=True)
        zero_gradients(img) 
        out  = model(img)
        cost = loss(out,tar)
        cost.backward()
        per = eps * torch.sign(img.grad.data)
        adv = img.data + per.cuda() 
        img = torch.clamp(adv,bounds[0],bounds[1])
    return img
Exemple #5
0
def compute_grad_matrix2(x, fx):
    assert x.requires_grad
    num_classes = fx.shape[1]
    jacobian = torch.zeros(num_classes, *x.shape, device=x.device)
    grad_output = torch.zeros(*fx.shape, device=x.device)
    for i in range(num_classes):
        zero_gradients(x)
        grad_output.zero_()
        grad_output[:, i] = 1
        fx.backward(grad_output, retain_graph=True)
        jacobian[i] = x.grad.data
    #jacobian = jacobian.transpose(0, 1).contiguous()
    # (n_classes x n_samples ) × n_features
    # n_outputs = jacobian.shape[1]
    return jacobian  #.view(jacobian.shape[0] * jacobian.shape[1], -1)
Exemple #6
0
def compute_jacobian(inputs, output):
    assert inputs.requires_grad
    num_classes = output.size()[1]
    jacobian = torch.zeros(num_classes, *inputs.size())
    grad_output = torch.zeros(*output.size())
    if inputs.is_cuda:
        grad_output = grad_output.cuda()
        jacobian = jacobian.cuda()
    for i in range(num_classes):
        zero_gradients(inputs)
        grad_output.zero_()
        grad_output[:, i] = 1
        output.backward(grad_output, retain_graph=True)
        jacobian[i] = inputs.grad.data
    return torch.transpose(jacobian, dim0=0, dim1=1)
	def __call__(self, x, t):
		grad = []
		for n in range(self.N):
			x_ = x.detach() * n / self.N
			x_.requires_grad = True
			y = self.model(x_)

			zero_gradients(x_)
			self.model.zero_grad()
			one_hot_label = int2onehot(t.item(), y.shape[-1]).to(self.device)

			y.backward(gradient=one_hot_label)
			grad.append(x_.grad.data.detach())

		grad = torch.cat(grad, dim=0).mean(dim=0, keepdim=True)
		return grad.data.detach()
def compute_jacobian(model, input):

    output = model(input)

    num_features = int(np.prod(input.shape[1:]))
    jacobian = torch.zeros([output.size()[1], num_features])
    mask = torch.zeros(output.size())  # chooses the derivative to be calculated
    for i in range(output.size()[1]):
        mask[:, i] = 1
        zero_gradients(input)
        output.backward(mask, retain_graph=True)
        # copy the derivative to the target place
        jacobian[i] = input._grad.squeeze().view(-1, num_features).clone()
        mask[:, i] = 0  # reset

    return jacobian
Exemple #9
0
def compute_contribution_derivatives(inputs, outputs):
    """
	:param inputs: Tensor: batch_size X max_seq_length X num_features
	:param outputs: Tensor: batch_size X max_seq_length X num_features
	:return: jacobian Tensor: num_class X batch_size X max_seq_length X num_features
	"""
    assert inputs.requires_grad

    grad_output = torch.ones(*outputs.size())
    if inputs.is_cuda:
        grad_output = grad_output.cuda()

    zero_gradients(inputs)
    outputs.backward(grad_output)

    return inputs.grad.data[0]
def non_targeted_attack(img_tensor):
    label = torch.zeros(1, 1)
    x, y = Variable(img_tensor, requires_grad=True), Variable(label)
    for step in range(steps):
        zero_gradients(x)
        out = model(x)
        y.data = out.data.max(1)[1]
        _loss = loss(out, y)
        _loss.backward()
        step_adv = x.data + step_alpha * torch.sign(x.grad.data)
        adv = step_adv - img_tensor
        adv = torch.clamp(adv, -eps, eps)
        result = img_tensor + adv
        result = torch.clamp(result, 0.0, 1.0)
        x.data = result
    return result, adv
def targeted_attack(img, label):
    label = label.long()
    x, y = Variable(img, requires_grad=True), Variable(label)
    for step in range(steps):
        zero_gradients(x)
        out = white_model(torch.unsqueeze(x, dim=0))
        _loss = loss(out, torch.unsqueeze(y, dim=0))
        _loss.backward()
        normed_grad = step_alpha * torch.sign(x.grad.data)
        step_adv = x.data - normed_grad
        adv = step_adv - img
        adv = torch.clamp(adv, -eps, eps)
        result = img + adv
        result = torch.clamp(result, 0.0, 1.0)
        x.data = result
    return result, adv
Exemple #12
0
def createIterativeAdversarial(image, output, x_pred, x_pred_prob):

    #    test_image = np.random.randint(256, size=(224, 224, 3), dtype=np.uint8)
    #    plt.imshow(test_image)
    #    plt.show()

    #    image = torch.randn((3, 224, 224), requires_grad = True).to(device)
    #    image = image.unsqueeze(0)
    #    model.forward(image)
    #    print(test_label)
    #    print(image.data)
    #    loss = torch.nn.CrossEntropyLoss()
    #    loss_cal2 = loss(output, test_label)
    #    print(image.grad.data)
    image_temp = image.clone()
    y_target = 35
    y_target = torch.tensor([y_target], requires_grad=False)
    print("targetlabel for the attack: {}".format(y_target.item()))
    y_target = y_target.to(device)

    epsilons = [0.5]
    #    epsilons = [0.5]
    num_iteration = 11
    alpha = 0.025

    # x_adversarial.data = image
    for epsilon in epsilons:
        for iteration in range(num_iteration):
            zero_gradients(image)
            loss = torch.nn.CrossEntropyLoss()
            loss_cal2 = loss(output, y_target)
            loss_cal2.backward(retain_graph=True)
            x_grad = alpha * torch.sign(image.grad.data)
            adv_temp = image.data - x_grad
            total_grad = adv_temp - image
            total_grad = torch.clamp(total_grad, -epsilon, epsilon)
            x_adversarial = image + total_grad
            image.data = x_adversarial

            if (iteration % 5 == 0):
                output_adv = model.forward(Variable(image))
                x_adv_pred = torch.max(output_adv.data, 1)[1][0]
                op_adv_probs = F.softmax(output_adv, dim=1)
                x_adv_pred_prob = torch.max(op_adv_probs.data, 1)[0][0]
                visualize(image_temp, image.data, total_grad, epsilon, x_pred,
                          x_adv_pred, x_pred_prob, x_adv_pred_prob, iteration,
                          alpha)
Exemple #13
0
    def run(self, model, input, target, batch_idx=0):
        input_var = autograd.Variable(input, requires_grad=True)
        target_var = autograd.Variable(target)
        GT_var = autograd.Variable(target)
        eps = self.eps

        step = 0
        while step < self.num_steps:
            zero_gradients(input_var)
            output = model(input_var)

            if not step:
                GT_var.data = output.data.max(1)[1]

            score = output

            score_GT = score.gather(1, GT_var.unsqueeze(1))
            score_target = score.gather(1, target_var.unsqueeze(1))

            loss = (score_target - score_GT).sum()
            loss.backward()

            step_alpha = self.step_alpha * (GT_var.data
                                            == output.data.max(1)[1]).float()
            step_alpha = step_alpha.unsqueeze(1).unsqueeze(1).unsqueeze(1)

            if step_alpha.sum() == 0:
                break

            pert = ((score_GT.data -
                     score_target.data).unsqueeze(1).unsqueeze(1))
            normed_grad = step_alpha * (pert + 1e-4) * input_var.grad.data / (
                l2_norm(input_var.grad.data))

            # perturb current input image by normalized and scaled gradient
            overshoot = 0.0
            step_adv = input_var.data + (1 + overshoot) * normed_grad

            total_adv = step_adv - input

            # apply total adversarial perturbation to original image and clip to valid pixel range
            input_adv = input + total_adv
            input_adv = torch.clamp(input_adv, -2.5, 2.5)
            input_var.data = input_adv
            step += 1

        return input_adv
Exemple #14
0
def compute_jacobian_and_bias(inputs, net):
    inputs.requires_grad = True
    outputs = net(inputs)

    inp_n1 = inputs.shape[-2]
    inp_n2 = inputs.shape[-1]
    out_n = outputs.shape[-1]

    jacobian = torch.zeros([inp_n1, inp_n2, out_n])

    for i in range(out_n):
        zero_gradients(inputs)
        outputs[0, i].backward(retain_graph=True)
        # print(jacobian[:, :, i].shape,  inputs.grad.data.shape) [2 ,50] === [1, 2, 50]
        jacobian[:, :, i] = inputs.grad.data[0]

    return jacobian.numpy(), inputs.detach().numpy(), outputs.detach().numpy()
def gen_adv(images, targets, model, outnames):
    """
    Stage the FGSM white box attack on first model to generate black box Adversarial Examples on the second model
    """
    imagetensor, variabledata, grads, epsilons, xpreds, xadvpred, xpredprob, xadvpredprob = [], [], [], [], [], [], [], []

    # Similar to fundamental training loops of any pytorch training function
    for i in range(len(images)):
        label_idx, x_pred, x_pred_prob, labels = get_label(
            model, images[i], True)
        model.eval()
        y_target = Variable(torch.LongTensor([targets[i]]),
                            requires_grad=False)
        epsilon = 0.25
        num_steps = 5
        alpha = 0.025
        image_tensor = transform_input(images[i])
        img_variable = Variable(image_tensor, requires_grad=True)

        total_grad = 0
        for i in range(num_steps):
            zero_gradients(img_variable)
            output = model.forward(img_variable)
            loss = torch.nn.CrossEntropyLoss()
            loss_cal = loss(output, y_target)
            loss_cal.backward()
            x_grad = alpha * torch.sign(img_variable.grad.data)
            adv_temp = img_variable.data - x_grad
            total_grad = adv_temp - image_tensor
            total_grad = torch.clamp(total_grad, -epsilon, epsilon)
            x_adv = image_tensor + total_grad
            img_variable.data = x_adv
        output_adv = model.forward(img_variable)
        x_adv_pred = labels[torch.max(output_adv.data, 1)[1][0].item()]
        output_adv_probs = F.softmax(output_adv, dim=1)
        x_adv_pred_prob = round(
            (torch.max(output_adv_probs.data, 1)[0][0].item()) * 100, 4)
        imagetensor.append(image_tensor)
        variabledata.append(img_variable.data)
        grads.append(total_grad)
        epsilons.append(epsilon)
        xpreds.append(x_pred)
        xadvpred.append(x_adv_pred)
        xpredprob.append(x_pred_prob)
        xadvpredprob.append(x_adv_pred_prob)
    return imagetensor, variabledata, grads, epsilons, xpreds, xadvpred, xpredprob, xadvpredprob, outnames
Exemple #16
0
    def _find_z(self, inputs, targets, h):
        '''
        Finding the direction in the regularizer
        '''
        inputs.requires_grad_()
        outputs = self.net.eval()(inputs)
        loss_z = self.criterion(outputs, targets)
        loss_z.backward()
        grad = inputs.grad.data + 0.0
        norm_grad = grad.norm().item()
        z = torch.sign(grad).detach() + 0.
        z = 1. * (h) * (z + 1e-7) / (
            z.reshape(z.size(0), -1).norm(dim=1)[:, None, None, None] + 1e-7)
        zero_gradients(inputs)
        self.net.zero_grad()

        return z, norm_grad
def GAMA_FW(model,
            data,
            target,
            eps,
            gamma=0.5,
            steps=100,
            SCHED=[60, 85],
            drop=5,
            w_reg=50,
            lin=25,
            twobranch=False):
    tar = Variable(target.cuda())
    data = data.cuda()
    B, C, H, W = data.size()
    delta = torch.rand_like(data).cuda()
    delta = eps * torch.sign(delta - 0.5)
    delta.requires_grad = True
    orig_img = data + delta
    orig_img = Variable(orig_img, requires_grad=False)
    WREG = w_reg
    for step in range(steps):
        if step in SCHED:
            gamma /= drop
        delta = Variable(delta, requires_grad=True)
        # make gradient of img to zeros
        zero_gradients(delta)
        if step < lin:
            out_all = model(normalize(torch.cat((orig_img, data + delta), 0)))
            out_all = out_all[0] if twobranch else out_all
            P_out = nn.Softmax(dim=1)(out_all[:B, :])
            Q_out = nn.Softmax(dim=1)(out_all[B:, :])
            cost = max_margin_loss(Q_out, tar) + WREG * (
                (Q_out - P_out)**2).sum(1).mean(0)
            WREG -= w_reg / lin
        else:
            out = model(normalize(data + delta))
            out = out[0] if twobranch else out
            Q_out = nn.Softmax(dim=1)(out)
            cost = max_margin_loss(Q_out, tar)
        cost.backward()
        delta.grad = torch.sign(delta.grad) * eps
        delta = (1 - gamma) * delta + gamma * delta.grad
        delta = (data + delta).clamp(0.0, 1.0) - data
        delta.data.clamp_(-eps, eps)
    return data + delta.detach()
Exemple #18
0
def targeted_attack(img, label):
    img = img.cpu()
    label = torch.Tensor([label]).long().cpu()

    x, y = Variable(img, requires_grad=True), Variable(label)
    for step in range(steps):
        zero_gradients(x)
        out = model(x)
        _loss = loss(out, y)
        _loss.backward()
        normed_grad = step_alpha * torch.sign(x.grad.data)
        step_adv = x.data - normed_grad
        adv = step_adv - img
        adv = torch.clamp(adv, -eps, eps)
        result = img + adv
        result = torch.clamp(result, 0.0, 1.0)
        x.data = result
    return result.cpu(), adv.cpu()
Exemple #19
0
def RFGSM_Attack_step(model,loss,image,target,eps=0.1,eps_iter=0.01,bounds=[0,1],steps=30):
    assert (not(model.training)), 'Model should be in eval mode'
    tar = Variable(target.cuda())
    B,C,H,W = image.size()
    eps = eps - eps_iter
    eps = eps/steps
    noise  = torch.FloatTensor(np.random.normal(loc=0.0, scale=1.0,size=(B,C,H,W)))
    img = torch.clamp(image.cuda()+eps_iter*torch.sign(noise.cuda()),0,1)
    for step in range(steps):
        img = Variable(img,requires_grad=True)
        zero_gradients(img) 
        out  = model(img)
        cost = loss(out,tar)
        cost.backward()
        per = eps * torch.sign(img.grad.data)
        adv = img.data + per.cuda() 
        img = torch.clamp(adv,bounds[0],bounds[1])
    return img
Exemple #20
0
def compute_jacobian(model, image, num_features):

    output = model(image)
    classes = output.size()[1]
    #初始化雅可比矩阵
    jacobian = torch.zeros([classes, num_features])  # 对于MNIST数据集,是一个10*784的矩阵
    mask = torch.zeros(
        output.size())  # chooses the derivative to be calculated
    for i in range(classes):
        mask[:, i] = 1
        zero_gradients(image)
        # retain_graph=True 使得下次计算backward可以正常执行
        output.backward(mask.to(device), retain_graph=True)
        # copy the derivative to the target place
        jacobian[i] = image._grad.squeeze().view(-1, num_features).clone()
        mask[:, i] = 0  # reset

    return jacobian
Exemple #21
0
def follow_gradient(img, net, alpha):
    boundary = []

    loss = nn.BCELoss()

    y = Variable(torch.FloatTensor(1, 1))

    if torch.cuda.is_available():
        img = img.cuda()
        y = y.cuda()

    x = Variable(img, requires_grad=True)
    zero_gradients(x)

    # get initial prediction
    out = net(x)
    pred = (out.data[0] > 0.5).cpu()

    boundary.append((img.cpu().squeeze(0).numpy(), out.data[0, 0]))

    # copy prediction into y
    y.data.copy_(pred)

    for _ in range(10):
        error = loss(out, y)
        error.backward()

        gradient = torch.sign(x.grad.data)

        gradient_mask = alpha * gradient

        # create updated img
        output = x.data + gradient_mask
        output.clamp_(0., 1.)

        # put step in x
        x.data.copy_(output)

        # repeat the process
        zero_gradients(x)
        out = net(x)
        boundary.append((output.cpu().squeeze(0).numpy(), out.data[0, 0]))

    return boundary, pred
    def generate(self, input_ids, input_mask, y, model):
        model.eval()
        with torch.no_grad():
            if torch.cuda.device_count() > 1:
                embedding = model.module.get_input_embeddings()(input_ids)
            else:
                embedding = model.get_input_embeddings()(input_ids)

        x = embedding.detach()

        inv_index = torch.arange(x.size(0) - 1, -1, -1).long()
        x_tilde = x[inv_index, :].detach()
        y_tilde = y[inv_index, :]
        x_init = x.detach() + torch.zeros_like(x).uniform_(
            -self.epsilon_x, self.epsilon_x)

        x_init.requires_grad_()
        zero_gradients(x_init)
        if x_init.grad is not None:
            x_init.grad.data.fill_(0)

        fea_b = model(inputs_embeds=x_init,
                      token_type_ids=None,
                      attention_mask=input_mask)[1][-1]
        fea_b = torch.mean(fea_b, 1)
        with torch.no_grad():
            fea_t = model(inputs_embeds=x_tilde,
                          token_type_ids=None,
                          attention_mask=input_mask)[1][-1]
            fea_t = torch.mean(fea_t, 1)

        Dx = cos_dist(fea_b, fea_t)
        model.zero_grad()
        if torch.cuda.device_count() > 1:
            Dx = Dx.mean()
        Dx.backward()

        x_prime = x_init.data - self.epsilon_x * torch.sign(x_init.grad.data)
        x_prime = torch.min(torch.max(x_prime, embedding - self.epsilon_x),
                            embedding + self.epsilon_x)

        y_prime = (1 - self.epsilon_y) * y + self.epsilon_y * y_tilde
        model.train()
        return x_prime.detach(), y_prime.detach()
Exemple #23
0
def fgsm(img):
    img = img.cpu()
    label = torch.zeros(1, 1).cpu()

    x, y = Variable(img, requires_grad=True), Variable(label)
    zero_gradients(x)
    out = model(x)
    y.data = out.data.max(1)[1]
    _loss = loss(out, y)
    _loss.backward()
    # normed_grad = eps * torch.sign(x.grad.data)
    normed_grad = torch.sign(x.grad.data)
    step_adv = x.data + normed_grad
    adv = step_adv - img
    adv = torch.clamp(adv, -eps, eps)
    result = img + adv
    result = torch.clamp(result, 0.0, 1.0)

    return result.cpu(), adv.cpu()
def generate_perturbed_inputs(basic_net,
                              config,
                              inputs,
                              label,
                              loss_func=None):
    rand = config['random_start']
    step_size = config['step_size']
    epsilon = config['epsilon']
    num_steps = config['num_steps']
    loss_func = nn.CrossEntropyLoss(
        reduction='none') if loss_func is None else loss_func

    step_sign = 1.0

    x = inputs.detach()
    if rand:
        x = x + torch.zeros_like(x).uniform_(-epsilon, epsilon)
    x_org = x.detach()
    loss_array = np.zeros((inputs.size(0), num_steps))

    for i in range(num_steps):

        x.requires_grad_(True)
        zero_gradients(x)
        if x.grad is not None:
            x.grad.data.fill_(0)

        output = basic_net(x)
        x.requires_grad_(True)

        loss = loss_func(output.float(), label.long()).mean()

        # loss = loss.mean()

        basic_net.zero_grad()
        loss.backward()

        x_adv = x.data + step_sign * step_size * torch.sign(x.grad.data)
        x_adv = torch.min(torch.max(x_adv, inputs - epsilon), inputs + epsilon)
        x_adv = torch.clamp(x_adv, 0.0, 1.0)
        x = Variable(x_adv)

    return x
    def calc_approx_of_jacobina(self, inputs, grad_output, output,
                                use_fixed_model):
        zero_gradients(inputs)

        output_col = output.view(output.shape[0], -1)
        grad_output_col = grad_output.view(grad_output.shape[0], -1)
        values, indices = output_col.max(dim=1)

        for i in range(0, self.args.batch_size):
            grad_output_col[i, indices[i]] = 1

        grad_output = grad_output_col.view(grad_output.shape)

        return torch.autograd.grad(outputs=output,
                                   inputs=inputs,
                                   grad_outputs=grad_output,
                                   only_inputs=True,
                                   retain_graph=True,
                                   create_graph=not use_fixed_model)[0]
Exemple #26
0
def non_targeted_attack(img):
    img = img.cpu()
    label = torch.zeros(1, 1).cpu()

    x, y = Variable(img, requires_grad=True), Variable(label)
    for step in range(steps):
        zero_gradients(x)
        out = model(x)
        y.data = out.data.max(1)[1]
        _loss = loss(out, y)
        _loss.backward()
        normed_grad = step_alpha * torch.sign(x.grad.data)
        step_adv = x.data + normed_grad
        adv = step_adv - img
        adv = torch.clamp(adv, -eps, eps)
        result = img + adv
        result = torch.clamp(result, 0.0, 1.0)
        x.data = result
    return result.cpu(), adv.cpu()
Exemple #27
0
def targeted_attack(img, steps, step_lr, eps, label):
    model_0.to(device).eval()
    img = torch.from_numpy(img).to(device)
    label = torch.Tensor([label]).long().to(device)
    x, y = Variable(img,
                    requires_grad=True).to(device), Variable(label).to(device)
    for step in range(steps):
        zero_gradients(x)
        out = model_0(x)
        _loss = loss(out, y)
        _loss.backward()
        normed_grad = step_lr * torch.sign(x.grad.data)
        step_adv = x.data - normed_grad
        adv = step_adv - img
        adv = torch.clamp(adv, -eps, eps)
        result = img + adv
        result = torch.clamp(result, 0.0, 1.0)
        x.data = result
    return result.cpu(), adv.cpu()
def pgd_attack(model, image_tensor, img_variable, tar_label_variable, n_steps,
               eps_max, eps_step, clip_min, clip_max, targeted, rand_init):
    """
    image_tensor: tensor which holds the clean images. 
    img_variable: Corresponding pytorch variable for image_tensor.
    tar_label_variable: Assuming targeted attack, this variable holds the targeted labels. 
    n_steps: number of attack iterations. 
    eps_max: maximum l_inf attack perturbations. 
    eps_step: l_inf attack perturbation per step
    """

    best_losses = None
    best_adv_x = None

    if rand_init:
        img_variable = rand_init_linf(img_variable, eps_max)

    output = model.forward(img_variable)
    for i in range(n_steps):
        zero_gradients(img_variable)
        output = model.forward(img_variable)
        loss_cal, losses_cal = cal_loss(output, tar_label_variable, targeted)
        best_losses, best_adv_x = track_best(best_losses, best_adv_x,
                                             losses_cal, img_variable)

        loss_cal, losses_cal = cal_loss(output, tar_label_variable, targeted)
        loss_cal.backward()
        x_grad = -1 * eps_step * torch.sign(img_variable.grad.data)
        adv_temp = img_variable.data + x_grad
        total_grad = adv_temp - image_tensor
        total_grad = torch.clamp(total_grad, -eps_max, eps_max)
        x_adv = image_tensor + total_grad
        x_adv = torch.clamp(
            torch.clamp(x_adv - image_tensor, -1 * eps_max, eps_max) +
            image_tensor, clip_min, clip_max)
        img_variable.data = x_adv

    best_losses, best_adv_x = track_best(best_losses, best_adv_x, losses_cal,
                                         img_variable)
    #print("peturbation= {}".format(
    #    np.max(np.abs(np.array(x_adv)-np.array(image_tensor)))))
    return best_adv_x
	def __call__(self, x, t):
		sigma = self.noise_level * (x.max() - x.min())
		smooth_gradient = torch.zeros_like(x).to(self.device)

		noisy_x = []
		for i in range(self.num_samples):
			noisy_x.append(self._add_noise(x, sigma))
		noisy_x = torch.cat(noisy_x, dim=0).to(self.device)

		noisy_x.requires_grad = True		
		y = self.model(noisy_x)

		zero_gradients(noisy_x)
		one_hot_label = int2onehot(t.item(), y.shape[-1]).to(self.device)
		one_hot_label = one_hot_label.repeat(self.num_samples, 1)

		y.backward(gradient=one_hot_label, retain_graph=True)
		grad = torch.mean(noisy_x.grad.data.detach(), dim=0, keepdim=True)

		return grad
Exemple #30
0
def adv_interp(inputs,
               y,
               base_net,
               num_classes,
               epsilon=8,
               epsilon_y=0.5,
               v_min=0,
               v_max=255):
    # x: image batch with shape [batch_size, c, h, w]
    # y: one-hot label batch with shape [batch_size, num_classes]
    net = copy.deepcopy(base_net)
    x = inputs.clone()

    inv_index = torch.arange(x.size(0) - 1, -1, -1).long()
    x_prime = x[inv_index, :, :, :].detach()
    y_prime = y[inv_index, :]
    x_init = x.detach() + torch.zeros_like(x).uniform_(-epsilon, epsilon)

    x_init.requires_grad_()
    zero_gradients(x_init)
    if x_init.grad is not None:
        x_init.grad.data.fill_(0)
    net.eval()

    fea_b = net(x_init, mode='feature')
    fea_t = net(x_prime, mode='feature')

    loss_adv = cos_dist(fea_b, fea_t)
    net.zero_grad()
    loss_adv = loss_adv.mean()
    loss_adv.backward(retain_graph=True)

    x_tilde = x_init.data - epsilon * torch.sign(x_init.grad.data)

    x_tilde = torch.min(torch.max(x_tilde, inputs - epsilon), inputs + epsilon)
    x_tilde = torch.clamp(x_tilde, v_min, v_max)

    y_bar_prime = (1 - y_prime) / (num_classes - 1.0)
    y_tilde = (1 - epsilon_y) * y + epsilon_y * y_bar_prime

    return x_tilde.detach(), y_tilde.detach()