コード例 #1
0
def generate_ae(model,
                data,
                labels,
                attack_configs,
                save=False,
                output_dir="../../task1_data/"):
    """
    Generate adversarial examples
    :param model: WeakDefense. The targeted model.
    :param data: array. The benign samples to generate adversarial for.
    :param labels: array or list. The true labels.
    :param attack_configs: dictionary. Attacks and corresponding settings.
    :param save: boolean. True, if save the adversarial examples.
    :param output_dir: str or path. Location to save the adversarial examples.
        It cannot be None when save is True.
    :return:
    """
    img_rows, img_cols = data.shape[1], data.shape[2]
    num_attacks = attack_configs.get("num_attacks")
    data_loader = (data, labels)

    if len(labels.shape) > 1:
        labels = np.asarray([np.argmax(p) for p in labels])

    # generate attacks one by one
    for id in range(num_attacks):
        key = "configs{}".format(id)
        data_adv = generate(model=model,
                            data_loader=data_loader,
                            attack_args=attack_configs.get(key))
        # predict the adversarial examples
        predictions = model.predict(data_adv)
        predictions = np.asarray([np.argmax(p) for p in predictions])

        err = error_rate(y_pred=predictions, y_true=labels)
        print(">>> error rate:", err)

        # plotting some examples
        num_plotting = min(data.shape[0], 5)
        for i in range(num_plotting):
            img = data_adv[i].reshape((img_rows, img_cols))
            plt.imshow(img, cmap='gray')
            title = '{}: {}->{}'.format(
                attack_configs.get(key).get("description"), labels[i],
                predictions[i])
            plt.title(title)
            plt.show()
            plt.close()

        # save the adversarial example
        if save:
            if output_dir is None:
                raise ValueError("Cannot save images to a none path.")
            # save with a random name
            file = os.path.join(output_dir, "{}.npy".format(time.monotonic()))
            print("Save the adversarial examples to file [{}].".format(file))
            np.save(file, data_adv)
コード例 #2
0
def adv_train(model, attack, device, train_loader, optimizer, epoch):
    model.train()
    train_loss = 0.0
    for _, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        adv_output = model(attack.generate(model, data, target))
        loss = attack.lf(output, target) + attack.lf(adv_output, target)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()

    train_loss /= len(train_loader)

    print('Train Epoch: {} \tLoss: {:.6f}'.format(epoch, train_loss))
コード例 #3
0
def adv_guide_pgd_train(model, attack, device, train_loader, guide_sets,
                        optimizer, epoch, beta, gamma, epsilon, weight_decay,
                        gradient_decay):
    model.train()

    def aux_targets_gen(target):
        aux_target = torch.empty((9, 0), dtype=torch.int)
        for i in range(len(target)):
            target_list = list(range(10))
            target_list.remove(target[i])
            random.shuffle(target_list)
            target_tensor = torch.tensor(target_list).unsqueeze(dim=1)
            aux_target = torch.cat((aux_target, target_tensor), dim=-1)
        return aux_target

    def guide_sample(datasets, adv_pred):
        def sample(dataset):
            import random
            idex = random.randint(0, len(dataset) - 1)
            return dataset[idex]

        data, target = sample(datasets[adv_pred[0]])
        databatch = data.unsqueeze(dim=0)
        labels = []
        labels.append(target)
        for i in range(1, len(adv_pred)):
            data, target = sample(datasets[adv_pred[i]])
            databatch = torch.cat([databatch, data.unsqueeze(dim=0)], dim=0)
            labels.append(target)

        labels = torch.tensor(labels)

        return databatch, labels

    loss_sum = 0.0
    train_loss_sum = 0.0
    guided_loss_sum = 0.0
    adv_loss_sum = 0.0
    for _, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)

        # target selecting
        # generate adversarial perturbation for 9 target
        # target_label = target.clone().detach()
        aux_targets = aux_targets_gen(target)
        guided_loss = 0
        adv_loss = 0
        for target_label in aux_targets:
            target_label = target_label.to(device)
            data_copy = data.clone().detach().requires_grad_(True)
            output_copy = model(data_copy)
            L1 = F.nll_loss(output_copy, target_label)
            adv_pertur = -torch.autograd.grad(L1, data_copy,
                                              create_graph=True)[0]

            # normalizing adversarial perturbation
            adv_max = torch.max(torch.max(adv_pertur, dim=-1)[0], dim=-1)[0]
            adv_min = torch.min(torch.min(adv_pertur, dim=-1)[0], dim=-1)[0]
            adv_mid = (adv_max + adv_min) / 2
            adv_zero = (adv_max - adv_min) / 2
            adv_norm = torch.ones_like(adv_pertur)
            for i in range(len(adv_pertur)):
                for j in range(len(adv_pertur[0])):
                    adv_norm[i][j] = (adv_pertur[i][j] - adv_mid[i][j].item()
                                      ) / adv_zero[i][j].item()
            adv_norm = epsilon * adv_norm
            # min = torch.min(adv_pertur)
            # max = torch.max(adv_pertur)
            # mid = (max + min) / 2
            # zero_mean = (max - min) / 2
            # adv_pertur_norm = epsilon * (adv_pertur - mid) / zero_mean
            adv_data = data.clone().detach() + adv_norm

            guide_data, _ = guide_sample(guide_sets, target_label)
            guide_data = guide_data.to(device)

            # adversarial example output
            adv_output = model(attack.generate(model, data, target))
            guided_loss = F.mse_loss(adv_data, guide_data) + guided_loss
            adv_loss = attack.lf(adv_output, target) + adv_loss
        # for i in range(len(target_label)):
        #     while target_label[i] == target[i]:
        #         target_label[i] = random.randint(0, 9)

        train_loss = F.nll_loss(output, target)
        loss = (1 - beta -
                gamma) * train_loss + beta * guided_loss + gamma * adv_loss
        loss.backward()
        optimizer.step()
        loss_sum += loss.item()
        train_loss_sum += train_loss.item()
        guided_loss_sum += guided_loss.item()
        adv_loss_sum += adv_loss.item()

    loss_sum /= len(train_loader)
    train_loss_sum /= len(train_loader)
    guided_loss_sum /= len(train_loader)
    adv_loss_sum /= len(train_loader)

    print(
        'Train Epoch: {} \tLoss: {:.6f}, Training Loss: {:.6f}, Guided Loss: {:.6f}, Adv Loss: {:.6f}'
        .format(epoch, loss_sum, train_loss_sum, guided_loss_sum,
                adv_loss_sum))