Ejemplo n.º 1
0
def targeted_detection(model, 
                       img, 
                       dataset, 
                       lr, 
                       t_radius, 
                       cap=200,
                       margin=20,
                       use_margin=False):
    model.eval()
    x_var = torch.autograd.Variable(img.clone().cuda(), requires_grad=True)
    true_label = model(transform(x_var.clone(), dataset=dataset)).data.max(1, keepdim=True)[1][0].item()
    optimizer_s = optim.SGD([x_var], lr=lr)
    target_l = torch.LongTensor([random_label(true_label, dataset=dataset)]).cuda()
    counter = 0
    while model(transform(x_var.clone(), dataset=dataset)).data.max(1, keepdim=True)[1][0].item() == true_label:
        optimizer_s.zero_grad()
        output = model(transform(x_var, dataset=dataset))
        if use_margin:
            target_l = target_l[0].item()
            _, top2_1 = output.data.cpu().topk(2)
            argmax11 = top2_1[0][0]
            if argmax11 == target_l:
                argmax11 = top2_1[0][1]
            loss = (output[0][argmax11] - output[0][target_l] + margin).clamp(min=0)
        else:
            loss = F.cross_entropy(output, target_l)
        loss.backward()

        x_var.data = torch.clamp(x_var - lr * x_var.grad.data, min=0, max=1)
        x_var.data = torch.clamp(x_var - img, min=-t_radius, max=t_radius) + img
        counter += 1
        if counter >= cap:
            break
    return counter
Ejemplo n.º 2
0
def CW(model,
       img,
       dataset='imagenet',
       allstep=30,
       lr=0.03,
       radius=0.1,
       margin=20.0,
       lbd=2,
       setting='white',
       noise_radius=0.1,
       targeted_lr=0.005,
       targeted_radius=0.03,
       untargeted_lr=0.1,
       untargeted_radius=0.03):
    model.eval()
    x_var = torch.autograd.Variable(img.clone().cuda(), requires_grad=True)
    true_label = model(transform(x_var.clone(), dataset=dataset)).data.max(
        1, keepdim=True)[1][0].item()
    optimizer = optim.Adam([x_var], lr=lr)
    target_label = random_label(true_label, dataset=dataset)

    for step in range(allstep):
        optimizer.zero_grad()
        total_loss = 0

        output_ori = model(transform(x_var, dataset=dataset))
        _, top2_1 = output_ori.data.cpu().topk(2)
        argmax11 = top2_1[0][0]
        if argmax11 == target_label:
            argmax11 = top2_1[0][1]
        loss1 = (output_ori[0][argmax11] - output_ori[0][target_label] +
                 margin).clamp(min=0)

        if setting == 'white':
            total_loss += lbd * loss1  # loss of original image, should descend

            noise_var = noisy(x_var, noise_radius)
            output_noise = model(transform(noise_var, dataset=dataset))
            loss2 = torch.norm(
                F.softmax(output_noise) - F.softmax(output_ori),
                1)  # l1(noisy_img-origin_img), should descend
            total_loss += loss2

            new_tl = random_label(target_label, dataset=dataset)
            new_target = torch.LongTensor([new_tl]).cuda()
            t_attack_var = t_attack(model, x_var, new_target, dataset, 1,
                                    targeted_lr,
                                    targeted_radius)  # 1 step t_attack
            output_t_attack = model(transform(t_attack_var, dataset=dataset))
            _, top2_3 = output_t_attack.data.cpu().topk(2)
            argmax13 = top2_3[0][0]
            if argmax13 == new_tl:
                argmax13 = top2_3[0][1]
            loss3 = (
                output_t_attack[0][argmax13] - output_t_attack[0][new_tl] +
                margin
            ).clamp(
                min=0
            )  # 1 step of targeted attack image, should be new_target, descend
            total_loss += loss3  # loss of sink image, should descend

            u_attack_var = u_attack(
                model, x_var, dataset, 1, untargeted_lr, untargeted_radius
            )  # 1 step u_attack, if you want to do white box attack for inception, then you will need to change 0.1 to 3 here
            output_u_attack = model(transform(u_attack_var, dataset=dataset))
            _, top2_4 = output_u_attack.data.cpu().topk(2)
            argmax14 = top2_4[0][1]
            if argmax14 == target_label:
                argmax14 = top2_4[0][0]
            loss4 = (
                output_u_attack[0][argmax14] -
                output_u_attack[0][target_label] + margin
            ).clamp(
                min=0
            )  # 1 step of u_targeted attack, should be away from target_l, ascend
            total_loss -= loss4

        elif setting == 'gray':
            total_loss += loss1

        else:
            raise "attack setting is not supported"

        total_loss.backward()
        optimizer.step()
        x_var.data = torch.clamp(torch.clamp(x_var, min=0, max=1) - img,
                                 min=-radius,
                                 max=radius) + img
    return x_var
Ejemplo n.º 3
0
def PGD(model,
        img,
        dataset='imagenet',
        allstep=30,
        lr=0.03,
        radius=0.1,
        lbd=2,
        setting='white',
        noise_radius=0.1,
        targeted_lr=0.005,
        targeted_radius=0.03,
        untargeted_lr=0.1,
        untargeted_radius=0.03):
    model.eval()
    x_var = torch.autograd.Variable(img.clone().cuda(), requires_grad=True)
    true_label = model(transform(x_var.clone(), dataset=dataset)).data.max(
        1, keepdim=True)[1][0].item()
    original_softmax = F.softmax(
        model(transform(x_var.clone(), dataset=dataset))).data
    optimizer = optim.Adam([x_var], lr=lr)
    target_label = random_label(true_label, dataset=dataset)
    target_l = torch.LongTensor([target_label]).cuda()
    target_dist = target_distribution(original_softmax, target_label)

    for i in range(allstep):

        optimizer.zero_grad()
        total_loss = 0

        output_ori = model(transform(x_var, dataset=dataset))
        loss1 = cross_entropy(
            output_ori, target_dist)  # loss of original image, should descend

        if setting == 'white':
            total_loss += lbd * loss1

            noise_var = noisy(x_var, noise_radius)
            output_noise = model(transform(noise_var, dataset=dataset))
            loss2 = torch.norm(
                F.softmax(output_noise) - F.softmax(output_ori),
                1)  # l1(noisy_img-origin_img), should descend
            total_loss += loss2

            new_target = torch.LongTensor(
                [random_label(target_label, dataset=dataset)]).cuda()
            t_attack_var = t_attack(model, x_var, new_target, dataset, 1,
                                    targeted_lr,
                                    targeted_radius)  # 1 step t_attack
            output_t_attack = model(transform(t_attack_var, dataset=dataset))
            loss3 = F.cross_entropy(
                output_t_attack, new_target
            )  # 1 step of targeted attack image, should be new_target, descend
            total_loss += loss3

            u_attack_var = u_attack(
                model, x_var, dataset, 1, untargeted_lr, untargeted_radius
            )  # 1 step u_attack, if you want to do white box attack for inception, then you will need to change 0.1 to 3 here
            output_u_attack = model(transform(u_attack_var, dataset=dataset))
            loss4 = F.cross_entropy(
                output_u_attack, target_l
            )  # 1 step of u_targeted attack, should be away from target_l, ascend
            total_loss -= loss4

        elif setting == 'gray':
            total_loss += loss1

        else:
            raise "attack setting is not supported"

        total_loss.backward()
        optimizer.step()
        x_var.data = torch.clamp(torch.clamp(x_var, min=0, max=1) - img,
                                 min=-radius,
                                 max=radius) + img

    return x_var